relcache.c 122.9 KB
Newer Older
1 2
/*-------------------------------------------------------------------------
 *
3
 * relcache.c
4
 *	  POSTGRES relation descriptor cache code
5
 *
6
 * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
B
Add:  
Bruce Momjian 已提交
7
 * Portions Copyright (c) 1994, Regents of the University of California
8 9 10
 *
 *
 * IDENTIFICATION
11
 *	  $PostgreSQL: pgsql/src/backend/utils/cache/relcache.c,v 1.266.2.9 2010/04/14 21:31:27 tgl Exp $
12 13 14 15 16
 *
 *-------------------------------------------------------------------------
 */
/*
 * INTERFACE ROUTINES
17
 *		RelationCacheInitialize			- initialize relcache (to empty)
18
 *		RelationCacheInitializePhase2	- finish initializing relcache
19 20
 *		RelationIdGetRelation			- get a reldesc by relation id
 *		RelationClose					- close an open relation
21 22
 *
 * NOTES
23 24
 *		The following code contains many undocumented hacks.  Please be
 *		careful....
25
 */
26 27
#include "postgres.h"

28
#include <sys/file.h>
29
#include <fcntl.h>
30
#include <unistd.h>
31

32 33
#include "access/genam.h"
#include "access/heapam.h"
34
#include "access/reloptions.h"
35
#include "access/xact.h"
36
#include "catalog/catalog.h"
37
#include "catalog/index.h"
B
Bruce Momjian 已提交
38
#include "catalog/indexing.h"
39
#include "catalog/namespace.h"
40 41
#include "catalog/pg_amop.h"
#include "catalog/pg_amproc.h"
B
Bruce Momjian 已提交
42
#include "catalog/pg_attrdef.h"
43
#include "catalog/pg_authid.h"
44
#include "catalog/pg_constraint.h"
45
#include "catalog/pg_namespace.h"
46
#include "catalog/pg_opclass.h"
47
#include "catalog/pg_operator.h"
B
Bruce Momjian 已提交
48
#include "catalog/pg_proc.h"
49
#include "catalog/pg_rewrite.h"
50
#include "catalog/pg_trigger.h"
51
#include "catalog/pg_type.h"
52
#include "commands/trigger.h"
B
Bruce Momjian 已提交
53
#include "miscadmin.h"
54 55
#include "optimizer/clauses.h"
#include "optimizer/planmain.h"
56
#include "optimizer/prep.h"
57
#include "optimizer/var.h"
58
#include "rewrite/rewriteDefine.h"
59
#include "storage/fd.h"
B
Bruce Momjian 已提交
60
#include "storage/smgr.h"
61
#include "utils/builtins.h"
62
#include "utils/fmgroids.h"
63
#include "utils/inval.h"
64
#include "utils/memutils.h"
B
Bruce Momjian 已提交
65
#include "utils/relcache.h"
66
#include "utils/resowner.h"
67
#include "utils/syscache.h"
68
#include "utils/typcache.h"
B
Bruce Momjian 已提交
69

70

71 72 73 74 75
/*
 * name of relcache init file, used to speed up backend startup
 */
#define RELCACHE_INIT_FILENAME	"pg_internal.init"

76
#define RELCACHE_INIT_FILEMAGIC		0x573264	/* version ID value */
77

78
/*
79
 *		hardcoded tuple descriptors.  see include/catalog/pg_attribute.h
80
 */
81 82 83 84
static FormData_pg_attribute Desc_pg_class[Natts_pg_class] = {Schema_pg_class};
static FormData_pg_attribute Desc_pg_attribute[Natts_pg_attribute] = {Schema_pg_attribute};
static FormData_pg_attribute Desc_pg_proc[Natts_pg_proc] = {Schema_pg_proc};
static FormData_pg_attribute Desc_pg_type[Natts_pg_type] = {Schema_pg_type};
85
static FormData_pg_attribute Desc_pg_index[Natts_pg_index] = {Schema_pg_index};
86

87
/*
88
 *		Hash tables that index the relation cache
89
 *
90 91
 *		We used to index the cache by both name and OID, but now there
 *		is only an index by OID.
92
 */
93 94 95 96 97 98
typedef struct relidcacheent
{
	Oid			reloid;
	Relation	reldesc;
} RelIdCacheEnt;

99
static HTAB *RelationIdCache;
100

101 102 103 104
/*
 * This flag is false until we have prepared the critical relcache entries
 * that are needed to do indexscans on the tables read by relcache building.
 */
B
Bruce Momjian 已提交
105
bool		criticalRelcachesBuilt = false;
106 107 108

/*
 * This counter counts relcache inval events received since backend startup
B
Bruce Momjian 已提交
109
 * (but only for rels that are actually in cache).	Presently, we use it only
110 111 112 113
 * to detect whether data about to be written by write_relcache_init_file()
 * might already be obsolete.
 */
static long relcacheInvalsReceived = 0L;
114

115 116 117 118 119
/*
 * This list remembers the OIDs of the relations cached in the relcache
 * init file.
 */
static List *initFileRelationIds = NIL;
120

121
/*
122
 * This flag lets us optimize away work in AtEO(Sub)Xact_RelationCache().
123
 */
124
static bool need_eoxact_work = false;
125

126

127
/*
128
 *		macros to manipulate the lookup hashtables
129 130
 */
#define RelationCacheInsert(RELATION)	\
131
do { \
132
	RelIdCacheEnt *idhentry; bool found; \
133
	idhentry = (RelIdCacheEnt*)hash_search(RelationIdCache, \
134
										   (void *) &(RELATION->rd_id), \
135
										   HASH_ENTER, &found); \
136
	/* used to give notice if found -- now just keep quiet */ \
137 138 139
	idhentry->reldesc = RELATION; \
} while(0)

140
#define RelationIdCacheLookup(ID, RELATION) \
141
do { \
142 143
	RelIdCacheEnt *hentry; \
	hentry = (RelIdCacheEnt*)hash_search(RelationIdCache, \
144 145
										 (void *) &(ID), \
										 HASH_FIND, NULL); \
146
	if (hentry) \
147 148 149 150 151 152 153
		RELATION = hentry->reldesc; \
	else \
		RELATION = NULL; \
} while(0)

#define RelationCacheDelete(RELATION) \
do { \
154
	RelIdCacheEnt *idhentry; \
155
	idhentry = (RelIdCacheEnt*)hash_search(RelationIdCache, \
156
										   (void *) &(RELATION->rd_id), \
157
										   HASH_REMOVE, NULL); \
158
	if (idhentry == NULL) \
159
		elog(WARNING, "trying to delete a rd_id reldesc that does not exist"); \
160
} while(0)
161

162 163 164

/*
 * Special cache for opclass-related information
165
 *
166 167
 * Note: only default operators and support procs get cached, ie, those with
 * lefttype = righttype = opcintype.
168 169 170 171 172 173 174
 */
typedef struct opclasscacheent
{
	Oid			opclassoid;		/* lookup key: OID of opclass */
	bool		valid;			/* set TRUE after successful fill-in */
	StrategyNumber numStrats;	/* max # of strategies (from pg_am) */
	StrategyNumber numSupport;	/* max # of support procs (from pg_am) */
175 176
	Oid			opcfamily;		/* OID of opclass's family */
	Oid			opcintype;		/* OID of opclass's declared input type */
177
	Oid		   *operatorOids;	/* strategy operators' OIDs */
B
Bruce Momjian 已提交
178
	RegProcedure *supportProcs; /* support procs */
179 180 181 182 183
} OpClassCacheEnt;

static HTAB *OpClassCache = NULL;


184
/* non-export function prototypes */
185

186
static void RelationDestroyRelation(Relation relation);
187
static void RelationClearRelation(Relation relation, bool rebuild);
B
Bruce Momjian 已提交
188

189
static void RelationReloadIndexInfo(Relation relation);
190
static void RelationFlushRelation(Relation relation);
191 192
static bool load_relcache_init_file(void);
static void write_relcache_init_file(void);
B
Bruce Momjian 已提交
193
static void write_item(const void *data, Size len, FILE *fp);
194

195
static void formrdesc(const char *relationName, Oid relationReltype,
B
Bruce Momjian 已提交
196
		  bool hasoids, int natts, FormData_pg_attribute *att);
197

198
static HeapTuple ScanPgRelation(Oid targetRelId, bool indexOK);
199
static Relation AllocateRelationDesc(Form_pg_class relp);
200
static void RelationParseRelOptions(Relation relation, HeapTuple tuple);
201
static void RelationBuildTupleDesc(Relation relation);
202
static Relation RelationBuildDesc(Oid targetRelId, bool insertIt);
203
static void RelationInitPhysicalAddr(Relation relation);
204
static void load_critical_index(Oid indexoid, Oid heapoid);
205
static TupleDesc GetPgClassDescriptor(void);
206
static TupleDesc GetPgIndexDescriptor(void);
207
static void AttrDefaultFetch(Relation relation);
208
static void CheckConstraintFetch(Relation relation);
209
static List *insert_ordered_oid(List *list, Oid datum);
210
static void IndexSupportInitialize(oidvector *indclass,
B
Bruce Momjian 已提交
211 212
					   Oid *indexOperator,
					   RegProcedure *indexSupport,
213 214
					   Oid *opFamily,
					   Oid *opcInType,
B
Bruce Momjian 已提交
215 216 217
					   StrategyNumber maxStrategyNumber,
					   StrategyNumber maxSupportNumber,
					   AttrNumber maxAttributeNumber);
218
static OpClassCacheEnt *LookupOpclassInfo(Oid operatorClassOid,
B
Bruce Momjian 已提交
219 220
				  StrategyNumber numStrats,
				  StrategyNumber numSupport);
221

222

223
/*
224
 *		ScanPgRelation
225
 *
226 227 228 229 230
 *		This is used by RelationBuildDesc to find a pg_class
 *		tuple matching targetRelId.  The caller must hold at least
 *		AccessShareLock on the target relid to prevent concurrent-update
 *		scenarios --- else our SnapshotNow scan might fail to find any
 *		version that it thinks is live.
231 232 233
 *
 *		NB: the returned tuple has been copied into palloc'd storage
 *		and must eventually be freed with heap_freetuple.
234
 */
235
static HeapTuple
236
ScanPgRelation(Oid targetRelId, bool indexOK)
237
{
238 239
	HeapTuple	pg_class_tuple;
	Relation	pg_class_desc;
240
	SysScanDesc pg_class_scan;
241
	ScanKeyData key[1];
242

243
	/*
B
Bruce Momjian 已提交
244
	 * form a scan key
245
	 */
246 247 248 249
	ScanKeyInit(&key[0],
				ObjectIdAttributeNumber,
				BTEqualStrategyNumber, F_OIDEQ,
				ObjectIdGetDatum(targetRelId));
250

251
	/*
B
Bruce Momjian 已提交
252
	 * Open pg_class and fetch a tuple.  Force heap scan if we haven't yet
B
Bruce Momjian 已提交
253 254 255
	 * built the critical relcache entries (this includes initdb and startup
	 * without a pg_internal.init file).  The caller can also force a heap
	 * scan by setting indexOK == false.
256
	 */
257 258
	pg_class_desc = heap_open(RelationRelationId, AccessShareLock);
	pg_class_scan = systable_beginscan(pg_class_desc, ClassOidIndexId,
259
									   indexOK && criticalRelcachesBuilt,
260
									   SnapshotNow,
261
									   1, key);
262

263
	pg_class_tuple = systable_getnext(pg_class_scan);
B
Bruce Momjian 已提交
264

H
Hiroshi Inoue 已提交
265
	/*
266
	 * Must copy tuple before releasing buffer.
H
Hiroshi Inoue 已提交
267
	 */
268 269
	if (HeapTupleIsValid(pg_class_tuple))
		pg_class_tuple = heap_copytuple(pg_class_tuple);
270

271 272
	/* all done */
	systable_endscan(pg_class_scan);
273
	heap_close(pg_class_desc, AccessShareLock);
274

275
	return pg_class_tuple;
276 277
}

278
/*
279
 *		AllocateRelationDesc
280
 *
281
 *		This is used to allocate memory for a new relation descriptor
282
 *		and initialize the rd_rel field from the given pg_class tuple.
283
 */
284
static Relation
285
AllocateRelationDesc(Form_pg_class relp)
286
{
287
	Relation	relation;
288
	MemoryContext oldcxt;
289
	Form_pg_class relationForm;
290

291 292
	/* Relcache entries must live in CacheMemoryContext */
	oldcxt = MemoryContextSwitchTo(CacheMemoryContext);
293

294
	/*
295
	 * allocate and zero space for new relation descriptor
296
	 */
297
	relation = (Relation) palloc0(sizeof(RelationData));
298

299
	/*
300
	 * clear fields of reldesc that should initialize to something non-zero
301
	 */
302
	relation->rd_targblock = InvalidBlockNumber;
303

304
	/* make sure relation is marked as having no open file yet */
305
	relation->rd_smgr = NULL;
306

307
	/*
B
Bruce Momjian 已提交
308
	 * Copy the relation tuple form
309
	 *
B
Bruce Momjian 已提交
310 311
	 * We only allocate space for the fixed fields, ie, CLASS_TUPLE_SIZE. The
	 * variable-length fields (relacl, reloptions) are NOT stored in the
312 313
	 * relcache --- there'd be little point in it, since we don't copy the
	 * tuple's nulls bitmap and hence wouldn't know if the values are valid.
B
Bruce Momjian 已提交
314 315 316 317
	 * Bottom line is that relacl *cannot* be retrieved from the relcache. Get
	 * it from the syscache if you need it.  The same goes for the original
	 * form of reloptions (however, we do store the parsed form of reloptions
	 * in rd_options).
318 319
	 */
	relationForm = (Form_pg_class) palloc(CLASS_TUPLE_SIZE);
320

321
	memcpy(relationForm, relp, CLASS_TUPLE_SIZE);
322 323

	/* initialize relation tuple form */
324
	relation->rd_rel = relationForm;
325

326
	/* and allocate attribute tuple form storage */
327 328
	relation->rd_att = CreateTemplateTupleDesc(relationForm->relnatts,
											   relationForm->relhasoids);
329 330
	/* which we mark as a reference-counted tupdesc */
	relation->rd_att->tdrefcount = 1;
331 332 333

	MemoryContextSwitchTo(oldcxt);

334
	return relation;
335 336
}

B
Bruce Momjian 已提交
337
/*
338 339 340 341 342 343
 * RelationParseRelOptions
 *		Convert pg_class.reloptions into pre-parsed rd_options
 *
 * tuple is the real pg_class tuple (not rd_rel!) for relation
 *
 * Note: rd_rel and (if an index) rd_am must be valid already
B
Bruce Momjian 已提交
344 345
 */
static void
346
RelationParseRelOptions(Relation relation, HeapTuple tuple)
B
Bruce Momjian 已提交
347
{
348 349 350
	Datum		datum;
	bool		isnull;
	bytea	   *options;
B
Bruce Momjian 已提交
351

352
	relation->rd_options = NULL;
B
Bruce Momjian 已提交
353

354
	/* Fall out if relkind should not have options */
B
Bruce Momjian 已提交
355 356
	switch (relation->rd_rel->relkind)
	{
357 358 359 360 361 362
		case RELKIND_RELATION:
		case RELKIND_TOASTVALUE:
		case RELKIND_INDEX:
			break;
		default:
			return;
B
Bruce Momjian 已提交
363 364
	}

365
	/*
B
Bruce Momjian 已提交
366 367 368
	 * Fetch reloptions from tuple; have to use a hardwired descriptor because
	 * we might not have any other for pg_class yet (consider executing this
	 * code for pg_class itself)
369 370 371 372 373 374 375
	 */
	datum = fastgetattr(tuple,
						Anum_pg_class_reloptions,
						GetPgClassDescriptor(),
						&isnull);
	if (isnull)
		return;
B
Bruce Momjian 已提交
376

377
	/* Parse into appropriate format; don't error out here */
B
Bruce Momjian 已提交
378 379
	switch (relation->rd_rel->relkind)
	{
380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400
		case RELKIND_RELATION:
		case RELKIND_TOASTVALUE:
			options = heap_reloptions(relation->rd_rel->relkind, datum,
									  false);
			break;
		case RELKIND_INDEX:
			options = index_reloptions(relation->rd_am->amoptions, datum,
									   false);
			break;
		default:
			Assert(false);		/* can't get here */
			options = NULL;		/* keep compiler quiet */
			break;
	}

	/* Copy parsed data into CacheMemoryContext */
	if (options)
	{
		relation->rd_options = MemoryContextAlloc(CacheMemoryContext,
												  VARSIZE(options));
		memcpy(relation->rd_options, options, VARSIZE(options));
401
		pfree(options);
B
Bruce Momjian 已提交
402 403 404
	}
}

405
/*
406
 *		RelationBuildTupleDesc
407
 *
408
 *		Form the relation's tuple descriptor from information in
409
 *		the pg_attribute, pg_attrdef & pg_constraint system catalogs.
410 411
 */
static void
412
RelationBuildTupleDesc(Relation relation)
413
{
414 415
	HeapTuple	pg_attribute_tuple;
	Relation	pg_attribute_desc;
416 417
	SysScanDesc pg_attribute_scan;
	ScanKeyData skey[2];
418
	int			need;
419
	TupleConstr *constr;
H
Hiroshi Inoue 已提交
420
	AttrDefault *attrdef = NULL;
421
	int			ndef = 0;
422

423 424 425 426
	/* copy some fields from pg_class row to rd_att */
	relation->rd_att->tdtypeid = relation->rd_rel->reltype;
	relation->rd_att->tdtypmod = -1;	/* unnecessary, but... */
	relation->rd_att->tdhasoid = relation->rd_rel->relhasoids;
427

428 429
	constr = (TupleConstr *) MemoryContextAlloc(CacheMemoryContext,
												sizeof(TupleConstr));
H
Hiroshi Inoue 已提交
430
	constr->has_not_null = false;
431

432
	/*
433
	 * Form a scan key that selects only user attributes (attnum > 0).
B
Bruce Momjian 已提交
434 435
	 * (Eliminating system attribute rows at the index level is lots faster
	 * than fetching them.)
436
	 */
437 438 439 440 441 442 443 444
	ScanKeyInit(&skey[0],
				Anum_pg_attribute_attrelid,
				BTEqualStrategyNumber, F_OIDEQ,
				ObjectIdGetDatum(RelationGetRelid(relation)));
	ScanKeyInit(&skey[1],
				Anum_pg_attribute_attnum,
				BTGreaterStrategyNumber, F_INT2GT,
				Int16GetDatum(0));
445

446
	/*
B
Bruce Momjian 已提交
447 448 449
	 * Open pg_attribute and begin a scan.	Force heap scan if we haven't yet
	 * built the critical relcache entries (this includes initdb and startup
	 * without a pg_internal.init file).
450
	 */
451
	pg_attribute_desc = heap_open(AttributeRelationId, AccessShareLock);
452
	pg_attribute_scan = systable_beginscan(pg_attribute_desc,
453
										   AttributeRelidNumIndexId,
454 455 456
										   criticalRelcachesBuilt,
										   SnapshotNow,
										   2, skey);
457

458
	/*
B
Bruce Momjian 已提交
459
	 * add attribute data to relation->rd_att
460
	 */
461
	need = relation->rd_rel->relnatts;
462

463
	while (HeapTupleIsValid(pg_attribute_tuple = systable_getnext(pg_attribute_scan)))
464
	{
465 466
		Form_pg_attribute attp;

467
		attp = (Form_pg_attribute) GETSTRUCT(pg_attribute_tuple);
468

469 470
		if (attp->attnum <= 0 ||
			attp->attnum > relation->rd_rel->relnatts)
471
			elog(ERROR, "invalid attribute number %d for %s",
472 473
				 attp->attnum, RelationGetRelationName(relation));

474 475
		memcpy(relation->rd_att->attrs[attp->attnum - 1],
			   attp,
476
			   ATTRIBUTE_TUPLE_SIZE);
477

478 479
		/* Update constraint/default info */
		if (attp->attnotnull)
480
			constr->has_not_null = true;
H
Hiroshi Inoue 已提交
481

482 483 484 485
		if (attp->atthasdef)
		{
			if (attrdef == NULL)
				attrdef = (AttrDefault *)
486 487 488
					MemoryContextAllocZero(CacheMemoryContext,
										   relation->rd_rel->relnatts *
										   sizeof(AttrDefault));
489 490 491
			attrdef[ndef].adnum = attp->attnum;
			attrdef[ndef].adbin = NULL;
			ndef++;
492
		}
493 494 495
		need--;
		if (need == 0)
			break;
496
	}
497

498
	/*
B
Bruce Momjian 已提交
499
	 * end the scan and close the attribute relation
500
	 */
501
	systable_endscan(pg_attribute_scan);
502
	heap_close(pg_attribute_desc, AccessShareLock);
H
Hiroshi Inoue 已提交
503

504 505 506 507
	if (need != 0)
		elog(ERROR, "catalog is missing %d attribute(s) for relid %u",
			 need, RelationGetRelid(relation));

508
	/*
B
Bruce Momjian 已提交
509 510 511
	 * The attcacheoff values we read from pg_attribute should all be -1
	 * ("unknown").  Verify this if assert checking is on.	They will be
	 * computed when and if needed during tuple access.
512 513 514
	 */
#ifdef USE_ASSERT_CHECKING
	{
B
Bruce Momjian 已提交
515
		int			i;
516 517 518 519 520 521

		for (i = 0; i < relation->rd_rel->relnatts; i++)
			Assert(relation->rd_att->attrs[i]->attcacheoff == -1);
	}
#endif

522
	/*
B
Bruce Momjian 已提交
523
	 * However, we can easily set the attcacheoff value for the first
B
Bruce Momjian 已提交
524 525
	 * attribute: it must be zero.	This eliminates the need for special cases
	 * for attnum=1 that used to exist in fastgetattr() and index_getattr().
526
	 */
527 528
	if (relation->rd_rel->relnatts > 0)
		relation->rd_att->attrs[0]->attcacheoff = 0;
529

530 531 532 533
	/*
	 * Set up constraint/default info
	 */
	if (constr->has_not_null || ndef > 0 || relation->rd_rel->relchecks)
534
	{
535
		relation->rd_att->constr = constr;
536

537
		if (ndef > 0)			/* DEFAULTs */
538
		{
539 540 541 542 543 544 545
			if (ndef < relation->rd_rel->relnatts)
				constr->defval = (AttrDefault *)
					repalloc(attrdef, ndef * sizeof(AttrDefault));
			else
				constr->defval = attrdef;
			constr->num_defval = ndef;
			AttrDefaultFetch(relation);
546
		}
547 548
		else
			constr->num_defval = 0;
549

550
		if (relation->rd_rel->relchecks > 0)	/* CHECKs */
551
		{
552 553
			constr->num_check = relation->rd_rel->relchecks;
			constr->check = (ConstrCheck *)
554
				MemoryContextAllocZero(CacheMemoryContext,
B
Bruce Momjian 已提交
555
									constr->num_check * sizeof(ConstrCheck));
556
			CheckConstraintFetch(relation);
557
		}
558 559 560 561 562 563 564
		else
			constr->num_check = 0;
	}
	else
	{
		pfree(constr);
		relation->rd_att->constr = NULL;
565
	}
566 567
}

568
/*
569
 *		RelationBuildRuleLock
570
 *
571 572
 *		Form the relation's rewrite rules from information in
 *		the pg_rewrite system catalog.
573 574 575 576 577 578 579
 *
 * Note: The rule parsetrees are potentially very complex node structures.
 * To allow these trees to be freed when the relcache entry is flushed,
 * we make a private memory context to hold the RuleLock information for
 * each relcache entry that has associated rules.  The context is used
 * just for rule info, not for any other subsidiary data of the relcache
 * entry, because that keeps the update logic in RelationClearRelation()
B
Bruce Momjian 已提交
580
 * manageable.	The other subsidiary data structures are simple enough
581
 * to be easy to free explicitly, anyway.
582 583 584 585
 */
static void
RelationBuildRuleLock(Relation relation)
{
586 587
	MemoryContext rulescxt;
	MemoryContext oldcxt;
588 589 590 591
	HeapTuple	rewrite_tuple;
	Relation	rewrite_desc;
	TupleDesc	rewrite_tupdesc;
	SysScanDesc rewrite_scan;
592 593 594 595 596
	ScanKeyData key;
	RuleLock   *rulelock;
	int			numlocks;
	RewriteRule **rules;
	int			maxlocks;
597

598
	/*
B
Bruce Momjian 已提交
599 600
	 * Make the private context.  Parameters are set on the assumption that
	 * it'll probably not contain much data.
601 602 603
	 */
	rulescxt = AllocSetContextCreate(CacheMemoryContext,
									 RelationGetRelationName(relation),
604 605 606
									 ALLOCSET_SMALL_MINSIZE,
									 ALLOCSET_SMALL_INITSIZE,
									 ALLOCSET_SMALL_MAXSIZE);
607 608
	relation->rd_rulescxt = rulescxt;

609
	/*
B
Bruce Momjian 已提交
610 611
	 * allocate an array to hold the rewrite rules (the array is extended if
	 * necessary)
612 613
	 */
	maxlocks = 4;
614 615
	rules = (RewriteRule **)
		MemoryContextAlloc(rulescxt, sizeof(RewriteRule *) * maxlocks);
616 617
	numlocks = 0;

618
	/*
B
Bruce Momjian 已提交
619
	 * form a scan key
620
	 */
621 622 623 624
	ScanKeyInit(&key,
				Anum_pg_rewrite_ev_class,
				BTEqualStrategyNumber, F_OIDEQ,
				ObjectIdGetDatum(RelationGetRelid(relation)));
625

626
	/*
B
Bruce Momjian 已提交
627
	 * open pg_rewrite and begin a scan
628
	 *
629 630
	 * Note: since we scan the rules using RewriteRelRulenameIndexId, we will
	 * be reading the rules in name order, except possibly during
B
Bruce Momjian 已提交
631 632
	 * emergency-recovery operations (ie, IgnoreSystemIndexes). This in turn
	 * ensures that rules will be fired in name order.
633
	 */
634
	rewrite_desc = heap_open(RewriteRelationId, AccessShareLock);
635
	rewrite_tupdesc = RelationGetDescr(rewrite_desc);
B
Bruce Momjian 已提交
636
	rewrite_scan = systable_beginscan(rewrite_desc,
637
									  RewriteRelRulenameIndexId,
638 639 640 641
									  true, SnapshotNow,
									  1, &key);

	while (HeapTupleIsValid(rewrite_tuple = systable_getnext(rewrite_scan)))
642
	{
643
		Form_pg_rewrite rewrite_form = (Form_pg_rewrite) GETSTRUCT(rewrite_tuple);
644
		bool		isnull;
645 646 647
		Datum		rule_datum;
		text	   *rule_text;
		char	   *rule_str;
648
		RewriteRule *rule;
649

650 651
		rule = (RewriteRule *) MemoryContextAlloc(rulescxt,
												  sizeof(RewriteRule));
652

653
		rule->ruleId = HeapTupleGetOid(rewrite_tuple);
654

655 656
		rule->event = rewrite_form->ev_type - '0';
		rule->attrno = rewrite_form->ev_attr;
657
		rule->enabled = rewrite_form->ev_enabled;
658 659
		rule->isInstead = rewrite_form->is_instead;

660
		/*
B
Bruce Momjian 已提交
661 662 663 664
		 * Must use heap_getattr to fetch ev_action and ev_qual.  Also, the
		 * rule strings are often large enough to be toasted.  To avoid
		 * leaking memory in the caller's context, do the detoasting here so
		 * we can free the detoasted version.
665 666
		 */
		rule_datum = heap_getattr(rewrite_tuple,
667
								  Anum_pg_rewrite_ev_action,
668
								  rewrite_tupdesc,
B
Bruce Momjian 已提交
669
								  &isnull);
B
Bruce Momjian 已提交
670
		Assert(!isnull);
671 672 673
		rule_text = DatumGetTextP(rule_datum);
		rule_str = DatumGetCString(DirectFunctionCall1(textout,
												PointerGetDatum(rule_text)));
674
		oldcxt = MemoryContextSwitchTo(rulescxt);
675
		rule->actions = (List *) stringToNode(rule_str);
676
		MemoryContextSwitchTo(oldcxt);
677 678 679
		pfree(rule_str);
		if ((Pointer) rule_text != DatumGetPointer(rule_datum))
			pfree(rule_text);
680

681 682 683 684
		rule_datum = heap_getattr(rewrite_tuple,
								  Anum_pg_rewrite_ev_qual,
								  rewrite_tupdesc,
								  &isnull);
B
Bruce Momjian 已提交
685
		Assert(!isnull);
686 687 688
		rule_text = DatumGetTextP(rule_datum);
		rule_str = DatumGetCString(DirectFunctionCall1(textout,
												PointerGetDatum(rule_text)));
689
		oldcxt = MemoryContextSwitchTo(rulescxt);
690
		rule->qual = (Node *) stringToNode(rule_str);
691
		MemoryContextSwitchTo(oldcxt);
692 693 694
		pfree(rule_str);
		if ((Pointer) rule_text != DatumGetPointer(rule_datum))
			pfree(rule_text);
695

696 697
		/*
		 * We want the rule's table references to be checked as though by the
B
Bruce Momjian 已提交
698
		 * table owner, not the user referencing the rule.	Therefore, scan
699
		 * through the rule's actions and set the checkAsUser field on all
B
Bruce Momjian 已提交
700
		 * rtable entries.	We have to look at the qual as well, in case it
701 702
		 * contains sublinks.
		 *
B
Bruce Momjian 已提交
703 704 705 706 707
		 * The reason for doing this when the rule is loaded, rather than when
		 * it is stored, is that otherwise ALTER TABLE OWNER would have to
		 * grovel through stored rules to update checkAsUser fields. Scanning
		 * the rule tree during load is relatively cheap (compared to
		 * constructing it in the first place), so we do it here.
708 709 710 711
		 */
		setRuleCheckAsUser((Node *) rule->actions, relation->rd_rel->relowner);
		setRuleCheckAsUser(rule->qual, relation->rd_rel->relowner);

712
		if (numlocks >= maxlocks)
713 714
		{
			maxlocks *= 2;
715 716
			rules = (RewriteRule **)
				repalloc(rules, sizeof(RewriteRule *) * maxlocks);
717
		}
718
		rules[numlocks++] = rule;
719
	}
720

721
	/*
B
Bruce Momjian 已提交
722
	 * end the scan and close the attribute relation
723
	 */
724 725
	systable_endscan(rewrite_scan);
	heap_close(rewrite_desc, AccessShareLock);
726

727
	/*
B
Bruce Momjian 已提交
728
	 * form a RuleLock and insert into relation
729
	 */
730
	rulelock = (RuleLock *) MemoryContextAlloc(rulescxt, sizeof(RuleLock));
731 732 733 734
	rulelock->numLocks = numlocks;
	rulelock->rules = rules;

	relation->rd_rules = rulelock;
735 736
}

737
/*
738 739 740 741 742 743 744 745 746
 *		equalRuleLocks
 *
 *		Determine whether two RuleLocks are equivalent
 *
 *		Probably this should be in the rules code someplace...
 */
static bool
equalRuleLocks(RuleLock *rlock1, RuleLock *rlock2)
{
747
	int			i;
748

749
	/*
B
Bruce Momjian 已提交
750
	 * As of 7.3 we assume the rule ordering is repeatable, because
B
Bruce Momjian 已提交
751 752
	 * RelationBuildRuleLock should read 'em in a consistent order.  So just
	 * compare corresponding slots.
753
	 */
754 755 756 757 758 759 760 761 762
	if (rlock1 != NULL)
	{
		if (rlock2 == NULL)
			return false;
		if (rlock1->numLocks != rlock2->numLocks)
			return false;
		for (i = 0; i < rlock1->numLocks; i++)
		{
			RewriteRule *rule1 = rlock1->rules[i];
763 764 765
			RewriteRule *rule2 = rlock2->rules[i];

			if (rule1->ruleId != rule2->ruleId)
766 767 768 769 770
				return false;
			if (rule1->event != rule2->event)
				return false;
			if (rule1->attrno != rule2->attrno)
				return false;
771 772
			if (rule1->enabled != rule2->enabled)
				return false;
773 774
			if (rule1->isInstead != rule2->isInstead)
				return false;
775
			if (!equal(rule1->qual, rule2->qual))
776
				return false;
777
			if (!equal(rule1->actions, rule2->actions))
778 779 780 781 782 783
				return false;
		}
	}
	else if (rlock2 != NULL)
		return false;
	return true;
784 785 786
}


787
/*
788 789
 *		RelationBuildDesc
 *
790
 *		Build a relation descriptor.  The caller must hold at least
791
 *		AccessShareLock on the target relid.
792
 *
793 794
 *		The new descriptor is inserted into the hash table if insertIt is true.
 *
795 796 797
 *		Returns NULL if no pg_class row could be found for the given relid
 *		(suggesting we are trying to access a just-deleted relation).
 *		Any other error is reported via elog.
798
 */
799
static Relation
800
RelationBuildDesc(Oid targetRelId, bool insertIt)
801
{
802 803
	Relation	relation;
	Oid			relid;
804
	HeapTuple	pg_class_tuple;
805
	Form_pg_class relp;
806

807
	/*
B
Bruce Momjian 已提交
808
	 * find the tuple in pg_class corresponding to the given relation id
809
	 */
810
	pg_class_tuple = ScanPgRelation(targetRelId, true);
811

812
	/*
B
Bruce Momjian 已提交
813
	 * if no such tuple exists, return NULL
814 815 816 817
	 */
	if (!HeapTupleIsValid(pg_class_tuple))
		return NULL;

818
	/*
B
Bruce Momjian 已提交
819
	 * get information from the pg_class_tuple
820
	 */
821
	relid = HeapTupleGetOid(pg_class_tuple);
822 823
	relp = (Form_pg_class) GETSTRUCT(pg_class_tuple);

824
	/*
B
Bruce Momjian 已提交
825 826
	 * allocate storage for the relation descriptor, and copy pg_class_tuple
	 * to relation->rd_rel.
827
	 */
828
	relation = AllocateRelationDesc(relp);
829

830
	/*
B
Bruce Momjian 已提交
831
	 * initialize the relation's relation id (relation->rd_id)
832
	 */
833
	RelationGetRelid(relation) = relid;
834

835
	/*
B
Bruce Momjian 已提交
836 837 838
	 * normal relations are not nailed into the cache; nor can a pre-existing
	 * relation be new.  It could be temp though.  (Actually, it could be new
	 * too, but it's okay to forget that fact if forced to flush the entry.)
839
	 */
840
	relation->rd_refcnt = 0;
841
	relation->rd_isnailed = false;
842
	relation->rd_createSubid = InvalidSubTransactionId;
843
	relation->rd_newRelfilenodeSubid = InvalidSubTransactionId;
844
	relation->rd_istemp = isTempOrToastNamespace(relation->rd_rel->relnamespace);
845

846
	/*
B
Bruce Momjian 已提交
847
	 * initialize the tuple descriptor (relation->rd_att).
848
	 */
849
	RelationBuildTupleDesc(relation);
850

851
	/*
B
Bruce Momjian 已提交
852
	 * Fetch rules and triggers that affect this relation
853
	 */
854
	if (relation->rd_rel->relhasrules)
855 856
		RelationBuildRuleLock(relation);
	else
857
	{
858
		relation->rd_rules = NULL;
859 860
		relation->rd_rulescxt = NULL;
	}
861

862
	if (relation->rd_rel->reltriggers > 0)
863 864 865 866
		RelationBuildTriggers(relation);
	else
		relation->trigdesc = NULL;

867
	/*
868
	 * if it's an index, initialize index-related information
869
	 */
870
	if (OidIsValid(relation->rd_rel->relam))
871
		RelationInitIndexAccessInfo(relation);
872

873 874 875
	/* extract reloptions if any */
	RelationParseRelOptions(relation, pg_class_tuple);

876
	/*
B
Bruce Momjian 已提交
877
	 * initialize the relation lock manager information
878 879 880
	 */
	RelationInitLockInfo(relation);		/* see lmgr.c */

881 882 883 884
	/*
	 * initialize physical addressing information for the relation
	 */
	RelationInitPhysicalAddr(relation);
885

886
	/* make sure relation is marked as having no open file yet */
887
	relation->rd_smgr = NULL;
888

B
Bruce Momjian 已提交
889 890 891 892 893
	/*
	 * now we can free the memory allocated for pg_class_tuple
	 */
	heap_freetuple(pg_class_tuple);

894
	/*
895
	 * Insert newly created relation into relcache hash table, if requested.
896
	 */
897 898
	if (insertIt)
		RelationCacheInsert(relation);
899

900 901 902
	/* It's fully valid */
	relation->rd_isvalid = true;

903
	return relation;
904 905
}

906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922
/*
 * Initialize the physical addressing info (RelFileNode) for a relcache entry
 */
static void
RelationInitPhysicalAddr(Relation relation)
{
	if (relation->rd_rel->reltablespace)
		relation->rd_node.spcNode = relation->rd_rel->reltablespace;
	else
		relation->rd_node.spcNode = MyDatabaseTableSpace;
	if (relation->rd_rel->relisshared)
		relation->rd_node.dbNode = InvalidOid;
	else
		relation->rd_node.dbNode = MyDatabaseId;
	relation->rd_node.relNode = relation->rd_rel->relfilenode;
}

923 924 925 926 927
/*
 * Initialize index-access-method support data for an index relation
 */
void
RelationInitIndexAccessInfo(Relation relation)
928
{
929 930
	HeapTuple	tuple;
	Form_pg_am	aform;
931
	Datum		indclassDatum;
932
	Datum		indoptionDatum;
933
	bool		isnull;
934
	oidvector  *indclass;
B
Bruce Momjian 已提交
935
	int2vector *indoption;
936
	MemoryContext indexcxt;
937
	MemoryContext oldcontext;
938
	int			natts;
939 940
	uint16		amstrategies;
	uint16		amsupport;
941 942

	/*
943
	 * Make a copy of the pg_index entry for the index.  Since pg_index
B
Bruce Momjian 已提交
944 945
	 * contains variable-length and possibly-null fields, we have to do this
	 * honestly rather than just treating it as a Form_pg_index struct.
946 947 948 949 950
	 */
	tuple = SearchSysCache(INDEXRELID,
						   ObjectIdGetDatum(RelationGetRelid(relation)),
						   0, 0, 0);
	if (!HeapTupleIsValid(tuple))
951
		elog(ERROR, "cache lookup failed for index %u",
952
			 RelationGetRelid(relation));
953 954 955 956
	oldcontext = MemoryContextSwitchTo(CacheMemoryContext);
	relation->rd_indextuple = heap_copytuple(tuple);
	relation->rd_index = (Form_pg_index) GETSTRUCT(relation->rd_indextuple);
	MemoryContextSwitchTo(oldcontext);
957 958 959 960 961 962 963 964 965
	ReleaseSysCache(tuple);

	/*
	 * Make a copy of the pg_am entry for the index's access method
	 */
	tuple = SearchSysCache(AMOID,
						   ObjectIdGetDatum(relation->rd_rel->relam),
						   0, 0, 0);
	if (!HeapTupleIsValid(tuple))
966
		elog(ERROR, "cache lookup failed for access method %u",
967 968 969 970 971
			 relation->rd_rel->relam);
	aform = (Form_pg_am) MemoryContextAlloc(CacheMemoryContext, sizeof *aform);
	memcpy(aform, GETSTRUCT(tuple), sizeof *aform);
	ReleaseSysCache(tuple);
	relation->rd_am = aform;
972 973

	natts = relation->rd_rel->relnatts;
974
	if (natts != relation->rd_index->indnatts)
975
		elog(ERROR, "relnatts disagrees with indnatts for index %u",
976
			 RelationGetRelid(relation));
977 978
	amstrategies = aform->amstrategies;
	amsupport = aform->amsupport;
979

980
	/*
B
Bruce Momjian 已提交
981 982 983
	 * Make the private context to hold index access info.	The reason we need
	 * a context, and not just a couple of pallocs, is so that we won't leak
	 * any subsidiary info attached to fmgr lookup records.
984 985 986 987 988 989
	 *
	 * Context parameters are set on the assumption that it'll probably not
	 * contain much data.
	 */
	indexcxt = AllocSetContextCreate(CacheMemoryContext,
									 RelationGetRelationName(relation),
990 991 992
									 ALLOCSET_SMALL_MINSIZE,
									 ALLOCSET_SMALL_INITSIZE,
									 ALLOCSET_SMALL_MAXSIZE);
993 994 995 996 997
	relation->rd_indexcxt = indexcxt;

	/*
	 * Allocate arrays to hold data
	 */
998 999 1000
	relation->rd_aminfo = (RelationAmInfo *)
		MemoryContextAllocZero(indexcxt, sizeof(RelationAmInfo));

1001 1002 1003 1004 1005
	relation->rd_opfamily = (Oid *)
		MemoryContextAllocZero(indexcxt, natts * sizeof(Oid));
	relation->rd_opcintype = (Oid *)
		MemoryContextAllocZero(indexcxt, natts * sizeof(Oid));

1006
	if (amstrategies > 0)
1007
		relation->rd_operator = (Oid *)
1008 1009
			MemoryContextAllocZero(indexcxt,
								   natts * amstrategies * sizeof(Oid));
1010
	else
1011
		relation->rd_operator = NULL;
1012

1013
	if (amsupport > 0)
1014
	{
1015
		int			nsupport = natts * amsupport;
1016

1017
		relation->rd_support = (RegProcedure *)
1018
			MemoryContextAllocZero(indexcxt, nsupport * sizeof(RegProcedure));
1019
		relation->rd_supportinfo = (FmgrInfo *)
1020
			MemoryContextAllocZero(indexcxt, nsupport * sizeof(FmgrInfo));
1021 1022
	}
	else
1023
	{
1024 1025
		relation->rd_support = NULL;
		relation->rd_supportinfo = NULL;
1026
	}
1027

1028 1029 1030
	relation->rd_indoption = (int16 *)
		MemoryContextAllocZero(indexcxt, natts * sizeof(int16));

1031 1032
	/*
	 * indclass cannot be referenced directly through the C struct, because it
B
Bruce Momjian 已提交
1033 1034
	 * comes after the variable-width indkey field.  Must extract the datum
	 * the hard way...
1035 1036 1037 1038 1039 1040 1041
	 */
	indclassDatum = fastgetattr(relation->rd_indextuple,
								Anum_pg_index_indclass,
								GetPgIndexDescriptor(),
								&isnull);
	Assert(!isnull);
	indclass = (oidvector *) DatumGetPointer(indclassDatum);
1042

1043
	/*
B
Bruce Momjian 已提交
1044 1045 1046
	 * Fill the operator and support procedure OID arrays, as well as the info
	 * about opfamilies and opclass input types.  (aminfo and supportinfo are
	 * left as zeroes, and are filled on-the-fly when used)
1047
	 */
1048 1049 1050
	IndexSupportInitialize(indclass,
						   relation->rd_operator, relation->rd_support,
						   relation->rd_opfamily, relation->rd_opcintype,
1051
						   amstrategies, amsupport, natts);
1052

1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063
	/*
	 * Similarly extract indoption and copy it to the cache entry
	 */
	indoptionDatum = fastgetattr(relation->rd_indextuple,
								 Anum_pg_index_indoption,
								 GetPgIndexDescriptor(),
								 &isnull);
	Assert(!isnull);
	indoption = (int2vector *) DatumGetPointer(indoptionDatum);
	memcpy(relation->rd_indoption, indoption->values, natts * sizeof(int16));

1064 1065 1066 1067 1068
	/*
	 * expressions and predicate cache will be filled later
	 */
	relation->rd_indexprs = NIL;
	relation->rd_indpred = NIL;
1069
	relation->rd_amcache = NULL;
1070 1071
}

1072
/*
1073
 * IndexSupportInitialize
1074
 *		Initializes an index's cached opclass information,
1075
 *		given the index's pg_index.indclass entry.
1076
 *
1077 1078
 * Data is returned into *indexOperator, *indexSupport, *opFamily, and
 * *opcInType, which are arrays allocated by the caller.
1079 1080 1081 1082 1083 1084 1085 1086
 *
 * The caller also passes maxStrategyNumber, maxSupportNumber, and
 * maxAttributeNumber, since these indicate the size of the arrays
 * it has allocated --- but in practice these numbers must always match
 * those obtainable from the system catalog entries for the index and
 * access method.
 */
static void
1087
IndexSupportInitialize(oidvector *indclass,
1088 1089
					   Oid *indexOperator,
					   RegProcedure *indexSupport,
1090 1091
					   Oid *opFamily,
					   Oid *opcInType,
1092 1093 1094 1095 1096 1097 1098 1099 1100 1101
					   StrategyNumber maxStrategyNumber,
					   StrategyNumber maxSupportNumber,
					   AttrNumber maxAttributeNumber)
{
	int			attIndex;

	for (attIndex = 0; attIndex < maxAttributeNumber; attIndex++)
	{
		OpClassCacheEnt *opcentry;

1102
		if (!OidIsValid(indclass->values[attIndex]))
1103
			elog(ERROR, "bogus pg_index tuple");
1104 1105

		/* look up the info for this opclass, using a cache */
1106
		opcentry = LookupOpclassInfo(indclass->values[attIndex],
1107 1108 1109
									 maxStrategyNumber,
									 maxSupportNumber);

1110
		/* copy cached data into relcache entry */
1111 1112
		opFamily[attIndex] = opcentry->opcfamily;
		opcInType[attIndex] = opcentry->opcintype;
1113
		if (maxStrategyNumber > 0)
1114 1115 1116
			memcpy(&indexOperator[attIndex * maxStrategyNumber],
				   opcentry->operatorOids,
				   maxStrategyNumber * sizeof(Oid));
1117
		if (maxSupportNumber > 0)
1118 1119 1120
			memcpy(&indexSupport[attIndex * maxSupportNumber],
				   opcentry->supportProcs,
				   maxSupportNumber * sizeof(RegProcedure));
1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135
	}
}

/*
 * LookupOpclassInfo
 *
 * This routine maintains a per-opclass cache of the information needed
 * by IndexSupportInitialize().  This is more efficient than relying on
 * the catalog cache, because we can load all the info about a particular
 * opclass in a single indexscan of pg_amproc or pg_amop.
 *
 * The information from pg_am about expected range of strategy and support
 * numbers is passed in, rather than being looked up, mainly because the
 * caller will have it already.
 *
1136 1137 1138 1139 1140 1141 1142
 * Note there is no provision for flushing the cache.  This is OK at the
 * moment because there is no way to ALTER any interesting properties of an
 * existing opclass --- all you can do is drop it, which will result in
 * a useless but harmless dead entry in the cache.  To support altering
 * opclass membership (not the same as opfamily membership!), we'd need to
 * be able to flush this cache as well as the contents of relcache entries
 * for indexes.
1143 1144 1145 1146 1147 1148 1149 1150
 */
static OpClassCacheEnt *
LookupOpclassInfo(Oid operatorClassOid,
				  StrategyNumber numStrats,
				  StrategyNumber numSupport)
{
	OpClassCacheEnt *opcentry;
	bool		found;
1151 1152
	Relation	rel;
	SysScanDesc scan;
1153
	ScanKeyData skey[3];
1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167
	HeapTuple	htup;
	bool		indexOK;

	if (OpClassCache == NULL)
	{
		/* First time through: initialize the opclass cache */
		HASHCTL		ctl;

		if (!CacheMemoryContext)
			CreateCacheMemoryContext();

		MemSet(&ctl, 0, sizeof(ctl));
		ctl.keysize = sizeof(Oid);
		ctl.entrysize = sizeof(OpClassCacheEnt);
1168
		ctl.hash = oid_hash;
1169 1170 1171 1172 1173 1174 1175 1176
		OpClassCache = hash_create("Operator class cache", 64,
								   &ctl, HASH_ELEM | HASH_FUNCTION);
	}

	opcentry = (OpClassCacheEnt *) hash_search(OpClassCache,
											   (void *) &operatorClassOid,
											   HASH_ENTER, &found);

1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198
	if (!found)
	{
		/* Need to allocate memory for new entry */
		opcentry->valid = false;	/* until known OK */
		opcentry->numStrats = numStrats;
		opcentry->numSupport = numSupport;

		if (numStrats > 0)
			opcentry->operatorOids = (Oid *)
				MemoryContextAllocZero(CacheMemoryContext,
									   numStrats * sizeof(Oid));
		else
			opcentry->operatorOids = NULL;

		if (numSupport > 0)
			opcentry->supportProcs = (RegProcedure *)
				MemoryContextAllocZero(CacheMemoryContext,
									   numSupport * sizeof(RegProcedure));
		else
			opcentry->supportProcs = NULL;
	}
	else
1199 1200 1201 1202 1203
	{
		Assert(numStrats == opcentry->numStrats);
		Assert(numSupport == opcentry->numSupport);
	}

1204 1205 1206 1207 1208 1209 1210 1211 1212 1213
	/*
	 * When testing for cache-flush hazards, we intentionally disable the
	 * operator class cache and force reloading of the info on each call.
	 * This is helpful because we want to test the case where a cache flush
	 * occurs while we are loading the info, and it's very hard to provoke
	 * that if this happens only once per opclass per backend.
	 */
#if defined(CLOBBER_CACHE_ALWAYS)
	opcentry->valid = false;
#endif
1214

1215 1216
	if (opcentry->valid)
		return opcentry;
1217 1218

	/*
1219 1220
	 * Need to fill in new entry.
	 *
B
Bruce Momjian 已提交
1221 1222 1223
	 * To avoid infinite recursion during startup, force heap scans if we're
	 * looking up info for the opclasses used by the indexes we would like to
	 * reference here.
1224 1225 1226 1227 1228
	 */
	indexOK = criticalRelcachesBuilt ||
		(operatorClassOid != OID_BTREE_OPS_OID &&
		 operatorClassOid != INT2_BTREE_OPS_OID);

1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256
	/*
	 * We have to fetch the pg_opclass row to determine its opfamily and
	 * opcintype, which are needed to look up the operators and functions.
	 * It'd be convenient to use the syscache here, but that probably doesn't
	 * work while bootstrapping.
	 */
	ScanKeyInit(&skey[0],
				ObjectIdAttributeNumber,
				BTEqualStrategyNumber, F_OIDEQ,
				ObjectIdGetDatum(operatorClassOid));
	rel = heap_open(OperatorClassRelationId, AccessShareLock);
	scan = systable_beginscan(rel, OpclassOidIndexId, indexOK,
							  SnapshotNow, 1, skey);

	if (HeapTupleIsValid(htup = systable_getnext(scan)))
	{
		Form_pg_opclass opclassform = (Form_pg_opclass) GETSTRUCT(htup);

		opcentry->opcfamily = opclassform->opcfamily;
		opcentry->opcintype = opclassform->opcintype;
	}
	else
		elog(ERROR, "could not find tuple for opclass %u", operatorClassOid);

	systable_endscan(scan);
	heap_close(rel, AccessShareLock);


1257
	/*
B
Bruce Momjian 已提交
1258
	 * Scan pg_amop to obtain operators for the opclass.  We only fetch the
1259
	 * default ones (those with lefttype = righttype = opcintype).
1260 1261 1262
	 */
	if (numStrats > 0)
	{
1263
		ScanKeyInit(&skey[0],
1264
					Anum_pg_amop_amopfamily,
1265
					BTEqualStrategyNumber, F_OIDEQ,
1266
					ObjectIdGetDatum(opcentry->opcfamily));
1267
		ScanKeyInit(&skey[1],
1268 1269 1270 1271 1272
					Anum_pg_amop_amoplefttype,
					BTEqualStrategyNumber, F_OIDEQ,
					ObjectIdGetDatum(opcentry->opcintype));
		ScanKeyInit(&skey[2],
					Anum_pg_amop_amoprighttype,
1273
					BTEqualStrategyNumber, F_OIDEQ,
1274
					ObjectIdGetDatum(opcentry->opcintype));
1275 1276
		rel = heap_open(AccessMethodOperatorRelationId, AccessShareLock);
		scan = systable_beginscan(rel, AccessMethodStrategyIndexId, indexOK,
1277
								  SnapshotNow, 3, skey);
1278 1279

		while (HeapTupleIsValid(htup = systable_getnext(scan)))
1280 1281 1282 1283 1284
		{
			Form_pg_amop amopform = (Form_pg_amop) GETSTRUCT(htup);

			if (amopform->amopstrategy <= 0 ||
				(StrategyNumber) amopform->amopstrategy > numStrats)
1285
				elog(ERROR, "invalid amopstrategy number %d for opclass %u",
1286 1287 1288 1289 1290
					 amopform->amopstrategy, operatorClassOid);
			opcentry->operatorOids[amopform->amopstrategy - 1] =
				amopform->amopopr;
		}

1291 1292
		systable_endscan(scan);
		heap_close(rel, AccessShareLock);
1293 1294 1295
	}

	/*
B
Bruce Momjian 已提交
1296
	 * Scan pg_amproc to obtain support procs for the opclass.	We only fetch
1297
	 * the default ones (those with lefttype = righttype = opcintype).
1298 1299 1300
	 */
	if (numSupport > 0)
	{
1301
		ScanKeyInit(&skey[0],
1302
					Anum_pg_amproc_amprocfamily,
1303
					BTEqualStrategyNumber, F_OIDEQ,
1304
					ObjectIdGetDatum(opcentry->opcfamily));
1305
		ScanKeyInit(&skey[1],
1306
					Anum_pg_amproc_amproclefttype,
1307
					BTEqualStrategyNumber, F_OIDEQ,
1308 1309 1310 1311 1312
					ObjectIdGetDatum(opcentry->opcintype));
		ScanKeyInit(&skey[2],
					Anum_pg_amproc_amprocrighttype,
					BTEqualStrategyNumber, F_OIDEQ,
					ObjectIdGetDatum(opcentry->opcintype));
1313 1314
		rel = heap_open(AccessMethodProcedureRelationId, AccessShareLock);
		scan = systable_beginscan(rel, AccessMethodProcedureIndexId, indexOK,
1315
								  SnapshotNow, 3, skey);
1316 1317

		while (HeapTupleIsValid(htup = systable_getnext(scan)))
1318 1319 1320 1321 1322
		{
			Form_pg_amproc amprocform = (Form_pg_amproc) GETSTRUCT(htup);

			if (amprocform->amprocnum <= 0 ||
				(StrategyNumber) amprocform->amprocnum > numSupport)
1323
				elog(ERROR, "invalid amproc number %d for opclass %u",
1324 1325 1326 1327 1328 1329
					 amprocform->amprocnum, operatorClassOid);

			opcentry->supportProcs[amprocform->amprocnum - 1] =
				amprocform->amproc;
		}

1330 1331
		systable_endscan(scan);
		heap_close(rel, AccessShareLock);
1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342
	}

	opcentry->valid = true;
	return opcentry;
}


/*
 *		formrdesc
 *
 *		This is a special cut-down version of RelationBuildDesc()
1343
 *		used by RelationCacheInitializePhase2() in initializing the relcache.
1344
 *		The relation descriptor is built just from the supplied parameters,
1345 1346
 *		without actually looking at any system table entries.  We cheat
 *		quite a lot since we only need to work for a few basic system
1347 1348 1349
 *		catalogs.
 *
 * formrdesc is currently used for: pg_class, pg_attribute, pg_proc,
1350
 * and pg_type (see RelationCacheInitializePhase2).
1351
 *
1352 1353
 * Note that these catalogs can't have constraints (except attnotnull),
 * default values, rules, or triggers, since we don't cope with any of that.
1354
 *
1355
 * NOTE: we assume we are already switched into CacheMemoryContext.
1356 1357
 */
static void
1358 1359
formrdesc(const char *relationName, Oid relationReltype,
		  bool hasoids, int natts, FormData_pg_attribute *att)
1360
{
1361
	Relation	relation;
1362
	int			i;
1363
	bool		has_not_null;
1364

1365
	/*
1366
	 * allocate new relation desc, clear all fields of reldesc
1367
	 */
1368
	relation = (Relation) palloc0(sizeof(RelationData));
1369 1370 1371
	relation->rd_targblock = InvalidBlockNumber;

	/* make sure relation is marked as having no open file yet */
1372
	relation->rd_smgr = NULL;
1373

1374
	/*
1375
	 * initialize reference count: 1 because it is nailed in cache
1376
	 */
1377
	relation->rd_refcnt = 1;
1378

1379
	/*
B
Bruce Momjian 已提交
1380 1381
	 * all entries built with this routine are nailed-in-cache; none are for
	 * new or temp relations.
1382
	 */
1383
	relation->rd_isnailed = true;
1384
	relation->rd_createSubid = InvalidSubTransactionId;
1385
	relation->rd_newRelfilenodeSubid = InvalidSubTransactionId;
1386
	relation->rd_istemp = false;
1387

1388
	/*
B
Bruce Momjian 已提交
1389
	 * initialize relation tuple form
1390
	 *
1391 1392
	 * The data we insert here is pretty incomplete/bogus, but it'll serve to
	 * get us launched.  RelationCacheInitializePhase2() will read the real
1393 1394 1395
	 * data from pg_class and replace what we've done here.  Note in particular
	 * that relowner is left as zero; this cues RelationCacheInitializePhase2
	 * that the real data isn't there yet.
1396
	 */
1397
	relation->rd_rel = (Form_pg_class) palloc0(CLASS_TUPLE_SIZE);
1398

1399 1400
	namestrcpy(&relation->rd_rel->relname, relationName);
	relation->rd_rel->relnamespace = PG_CATALOG_NAMESPACE;
1401
	relation->rd_rel->reltype = relationReltype;
1402 1403

	/*
B
Bruce Momjian 已提交
1404 1405 1406
	 * It's important to distinguish between shared and non-shared relations,
	 * even at bootstrap time, to make sure we know where they are stored.	At
	 * present, all relations that formrdesc is used for are not shared.
1407
	 */
1408
	relation->rd_rel->relisshared = false;
1409

1410 1411
	relation->rd_rel->relpages = 1;
	relation->rd_rel->reltuples = 1;
1412
	relation->rd_rel->relkind = RELKIND_RELATION;
1413
	relation->rd_rel->relhasoids = hasoids;
1414
	relation->rd_rel->relnatts = (int16) natts;
1415

1416
	/*
B
Bruce Momjian 已提交
1417
	 * initialize attribute tuple form
1418
	 *
B
Bruce Momjian 已提交
1419
	 * Unlike the case with the relation tuple, this data had better be right
B
Bruce Momjian 已提交
1420 1421
	 * because it will never be replaced.  The input values must be correctly
	 * defined by macros in src/include/catalog/ headers.
1422
	 */
1423
	relation->rd_att = CreateTemplateTupleDesc(natts, hasoids);
1424 1425
	relation->rd_att->tdrefcount = 1;	/* mark as refcounted */

1426 1427
	relation->rd_att->tdtypeid = relationReltype;
	relation->rd_att->tdtypmod = -1;	/* unnecessary, but... */
1428

1429
	/*
B
Bruce Momjian 已提交
1430
	 * initialize tuple desc info
1431
	 */
1432
	has_not_null = false;
1433 1434
	for (i = 0; i < natts; i++)
	{
1435 1436
		memcpy(relation->rd_att->attrs[i],
			   &att[i],
1437
			   ATTRIBUTE_TUPLE_SIZE);
1438
		has_not_null |= att[i].attnotnull;
1439 1440
		/* make sure attcacheoff is valid */
		relation->rd_att->attrs[i]->attcacheoff = -1;
1441 1442
	}

1443 1444 1445
	/* initialize first attribute's attcacheoff, cf RelationBuildTupleDesc */
	relation->rd_att->attrs[0]->attcacheoff = 0;

1446 1447 1448 1449 1450 1451 1452 1453 1454
	/* mark not-null status */
	if (has_not_null)
	{
		TupleConstr *constr = (TupleConstr *) palloc0(sizeof(TupleConstr));

		constr->has_not_null = true;
		relation->rd_att->constr = constr;
	}

1455
	/*
1456
	 * initialize relation id from info in att array (my, this is ugly)
1457
	 */
1458
	RelationGetRelid(relation) = relation->rd_att->attrs[0]->attrelid;
1459
	relation->rd_rel->relfilenode = RelationGetRelid(relation);
1460

1461
	/*
1462
	 * initialize the relation lock manager information
1463 1464 1465
	 */
	RelationInitLockInfo(relation);		/* see lmgr.c */

1466 1467 1468 1469
	/*
	 * initialize physical addressing information for the relation
	 */
	RelationInitPhysicalAddr(relation);
1470

1471
	/*
B
Bruce Momjian 已提交
1472
	 * initialize the rel-has-index flag, using hardwired knowledge
1473
	 */
1474 1475 1476 1477 1478 1479
	if (IsBootstrapProcessingMode())
	{
		/* In bootstrap mode, we have no indexes */
		relation->rd_rel->relhasindex = false;
	}
	else
1480
	{
1481 1482
		/* Otherwise, all the rels formrdesc is used for have indexes */
		relation->rd_rel->relhasindex = true;
1483 1484
	}

1485
	/*
B
Bruce Momjian 已提交
1486
	 * add new reldesc to relcache
1487
	 */
1488
	RelationCacheInsert(relation);
1489 1490 1491

	/* It's fully valid */
	relation->rd_isvalid = true;
1492 1493 1494 1495
}


/* ----------------------------------------------------------------
1496
 *				 Relation Descriptor Lookup Interface
1497 1498 1499
 * ----------------------------------------------------------------
 */

1500
/*
1501
 *		RelationIdGetRelation
1502
 *
1503
 *		Lookup a reldesc by OID; make one if not already in cache.
1504
 *
1505 1506 1507
 *		Returns NULL if no pg_class row could be found for the given relid
 *		(suggesting we are trying to access a just-deleted relation).
 *		Any other error is reported via elog.
1508
 *
1509 1510 1511 1512
 *		NB: caller should already have at least AccessShareLock on the
 *		relation ID, else there are nasty race conditions.
 *
 *		NB: relation ref count is incremented, or set to 1 if new entry.
1513 1514
 *		Caller should eventually decrement count.  (Usually,
 *		that happens by calling RelationClose().)
1515 1516
 */
Relation
1517
RelationIdGetRelation(Oid relationId)
1518
{
1519
	Relation	rd;
1520

1521 1522 1523
	/*
	 * first try to find reldesc in the cache
	 */
1524 1525 1526
	RelationIdCacheLookup(relationId, rd);

	if (RelationIsValid(rd))
1527
	{
1528
		RelationIncrementReferenceCount(rd);
1529
		/* revalidate cache entry if necessary */
1530
		if (!rd->rd_isvalid)
1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541
		{
			/*
			 * Indexes only have a limited number of possible schema changes,
			 * and we don't want to use the full-blown procedure because it's
			 * a headache for indexes that reload itself depends on.
			 */
			if (rd->rd_rel->relkind == RELKIND_INDEX)
				RelationReloadIndexInfo(rd);
			else
				RelationClearRelation(rd, true);
		}
1542
		return rd;
1543
	}
1544

1545
	/*
B
Bruce Momjian 已提交
1546 1547
	 * no reldesc in the cache, so have RelationBuildDesc() build one and add
	 * it.
1548
	 */
1549
	rd = RelationBuildDesc(relationId, true);
1550 1551
	if (RelationIsValid(rd))
		RelationIncrementReferenceCount(rd);
1552 1553 1554 1555
	return rd;
}

/* ----------------------------------------------------------------
1556
 *				cache invalidation support routines
1557 1558 1559
 * ----------------------------------------------------------------
 */

1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589
/*
 * RelationIncrementReferenceCount
 *		Increments relation reference count.
 *
 * Note: bootstrap mode has its own weird ideas about relation refcount
 * behavior; we ought to fix it someday, but for now, just disable
 * reference count ownership tracking in bootstrap mode.
 */
void
RelationIncrementReferenceCount(Relation rel)
{
	ResourceOwnerEnlargeRelationRefs(CurrentResourceOwner);
	rel->rd_refcnt += 1;
	if (!IsBootstrapProcessingMode())
		ResourceOwnerRememberRelationRef(CurrentResourceOwner, rel);
}

/*
 * RelationDecrementReferenceCount
 *		Decrements relation reference count.
 */
void
RelationDecrementReferenceCount(Relation rel)
{
	Assert(rel->rd_refcnt > 0);
	rel->rd_refcnt -= 1;
	if (!IsBootstrapProcessingMode())
		ResourceOwnerForgetRelationRef(CurrentResourceOwner, rel);
}

1590
/*
1591 1592
 * RelationClose - close an open relation
 *
1593 1594 1595 1596 1597 1598 1599
 *	Actually, we just decrement the refcount.
 *
 *	NOTE: if compiled with -DRELCACHE_FORCE_RELEASE then relcache entries
 *	will be freed as soon as their refcount goes to zero.  In combination
 *	with aset.c's CLOBBER_FREED_MEMORY option, this provides a good test
 *	to catch references to already-released relcache entries.  It slows
 *	things down quite a bit, however.
1600 1601 1602 1603
 */
void
RelationClose(Relation relation)
{
1604 1605
	/* Note: no locking manipulations needed */
	RelationDecrementReferenceCount(relation);
1606 1607

#ifdef RELCACHE_FORCE_RELEASE
1608
	if (RelationHasReferenceCountZero(relation) &&
1609 1610
		relation->rd_createSubid == InvalidSubTransactionId &&
		relation->rd_newRelfilenodeSubid == InvalidSubTransactionId)
1611 1612
		RelationClearRelation(relation, false);
#endif
1613 1614
}

1615
/*
1616
 * RelationReloadIndexInfo - reload minimal information for an open index
1617
 *
1618 1619 1620 1621 1622 1623 1624
 *	This function is used only for indexes.  A relcache inval on an index
 *	can mean that its pg_class or pg_index row changed.  There are only
 *	very limited changes that are allowed to an existing index's schema,
 *	so we can update the relcache entry without a complete rebuild; which
 *	is fortunate because we can't rebuild an index entry that is "nailed"
 *	and/or in active use.  We support full replacement of the pg_class row,
 *	as well as updates of a few simple fields of the pg_index row.
1625
 *
1626
 *	We can't necessarily reread the catalog rows right away; we might be
1627 1628
 *	in a failed transaction when we receive the SI notification.  If so,
 *	RelationClearRelation just marks the entry as invalid by setting
1629
 *	rd_isvalid to false.  This routine is called to fix the entry when it
1630
 *	is next needed.
1631 1632 1633 1634
 *
 *	We assume that at the time we are called, we have at least AccessShareLock
 *	on the target index.  (Note: in the calls from RelationClearRelation,
 *	this is legitimate because we know the rel has positive refcount.)
H
Hiroshi Inoue 已提交
1635 1636
 */
static void
1637
RelationReloadIndexInfo(Relation relation)
H
Hiroshi Inoue 已提交
1638
{
1639
	bool		indexOK;
H
Hiroshi Inoue 已提交
1640
	HeapTuple	pg_class_tuple;
B
Bruce Momjian 已提交
1641
	Form_pg_class relp;
H
Hiroshi Inoue 已提交
1642

1643 1644 1645 1646 1647
	/* Should be called only for invalidated indexes */
	Assert(relation->rd_rel->relkind == RELKIND_INDEX &&
		   !relation->rd_isvalid);
	/* Should be closed at smgr level */
	Assert(relation->rd_smgr == NULL);
B
Bruce Momjian 已提交
1648

1649
	/*
1650 1651
	 * Read the pg_class row
	 *
1652 1653
	 * Don't try to use an indexscan of pg_class_oid_index to reload the info
	 * for pg_class_oid_index ...
1654
	 */
1655 1656
	indexOK = (RelationGetRelid(relation) != ClassOidIndexId);
	pg_class_tuple = ScanPgRelation(RelationGetRelid(relation), indexOK);
H
Hiroshi Inoue 已提交
1657
	if (!HeapTupleIsValid(pg_class_tuple))
1658
		elog(ERROR, "could not find pg_class tuple for index %u",
1659
			 RelationGetRelid(relation));
H
Hiroshi Inoue 已提交
1660
	relp = (Form_pg_class) GETSTRUCT(pg_class_tuple);
1661
	memcpy(relation->rd_rel, relp, CLASS_TUPLE_SIZE);
1662
	/* Reload reloptions in case they changed */
B
Bruce Momjian 已提交
1663 1664
	if (relation->rd_options)
		pfree(relation->rd_options);
1665 1666
	RelationParseRelOptions(relation, pg_class_tuple);
	/* done with pg_class tuple */
H
Hiroshi Inoue 已提交
1667
	heap_freetuple(pg_class_tuple);
1668 1669 1670
	/* We must recalculate physical address in case it changed */
	RelationInitPhysicalAddr(relation);
	/* Make sure targblock is reset in case rel was truncated */
1671
	relation->rd_targblock = InvalidBlockNumber;
1672 1673 1674 1675
	/* Must free any AM cached data, too */
	if (relation->rd_amcache)
		pfree(relation->rd_amcache);
	relation->rd_amcache = NULL;
1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693

	/*
	 * For a non-system index, there are fields of the pg_index row that are
	 * allowed to change, so re-read that row and update the relcache entry.
	 * Most of the info derived from pg_index (such as support function lookup
	 * info) cannot change, and indeed the whole point of this routine is to
	 * update the relcache entry without clobbering that data; so wholesale
	 * replacement is not appropriate.
	 */
	if (!IsSystemRelation(relation))
	{
		HeapTuple	tuple;
		Form_pg_index index;

		tuple = SearchSysCache(INDEXRELID,
							   ObjectIdGetDatum(RelationGetRelid(relation)),
							   0, 0, 0);
		if (!HeapTupleIsValid(tuple))
B
Bruce Momjian 已提交
1694 1695
			elog(ERROR, "cache lookup failed for index %u",
				 RelationGetRelid(relation));
1696 1697 1698
		index = (Form_pg_index) GETSTRUCT(tuple);

		relation->rd_index->indisvalid = index->indisvalid;
1699 1700 1701 1702
		relation->rd_index->indcheckxmin = index->indcheckxmin;
		relation->rd_index->indisready = index->indisready;
		HeapTupleHeaderSetXmin(relation->rd_indextuple->t_data,
							   HeapTupleHeaderGetXmin(tuple->t_data));
1703 1704 1705 1706

		ReleaseSysCache(tuple);
	}

1707
	/* Okay, now it's valid again */
1708
	relation->rd_isvalid = true;
H
Hiroshi Inoue 已提交
1709
}
1710

1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754
/*
 * RelationDestroyRelation
 *
 *	Physically delete a relation cache entry and all subsidiary data.
 *	Caller must already have unhooked the entry from the hash table.
 */
static void
RelationDestroyRelation(Relation relation)
{
	Assert(RelationHasReferenceCountZero(relation));

	/*
	 * Make sure smgr and lower levels close the relation's files, if they
	 * weren't closed already.  (This was probably done by caller, but let's
	 * just be real sure.)
	 */
	RelationCloseSmgr(relation);

	/*
	 * Free all the subsidiary data structures of the relcache entry,
	 * then the entry itself.
	 */
	if (relation->rd_rel)
		pfree(relation->rd_rel);
	/* can't use DecrTupleDescRefCount here */
	Assert(relation->rd_att->tdrefcount > 0);
	if (--relation->rd_att->tdrefcount == 0)
		FreeTupleDesc(relation->rd_att);
	list_free(relation->rd_indexlist);
	bms_free(relation->rd_indexattr);
	FreeTriggerDesc(relation->trigdesc);
	if (relation->rd_options)
		pfree(relation->rd_options);
	if (relation->rd_indextuple)
		pfree(relation->rd_indextuple);
	if (relation->rd_am)
		pfree(relation->rd_am);
	if (relation->rd_indexcxt)
		MemoryContextDelete(relation->rd_indexcxt);
	if (relation->rd_rulescxt)
		MemoryContextDelete(relation->rd_rulescxt);
	pfree(relation);
}

1755
/*
1756
 * RelationClearRelation
1757
 *
1758 1759
 *	 Physically blow away a relation cache entry, or reset it and rebuild
 *	 it from scratch (that is, from catalog entries).  The latter path is
1760 1761
 *	 used when we are notified of a change to an open relation (one with
 *	 refcount > 0).
1762
 *
1763 1764 1765 1766 1767 1768 1769 1770 1771 1772
 *	 NB: when rebuilding, we'd better hold some lock on the relation,
 *	 else the catalog data we need to read could be changing under us.
 *	 Also, a rel to be rebuilt had better have refcnt > 0.  This is because
 *	 an sinval reset could happen while we're accessing the catalogs, and
 *	 the rel would get blown away underneath us by RelationCacheInvalidate
 *	 if it has zero refcnt.
 *
 *	 The "rebuild" parameter is redundant in current usage because it has
 *	 to match the relation's refcnt status, but we keep it as a crosscheck
 *	 that we're doing what the caller expects.
1773
 */
1774
static void
1775
RelationClearRelation(Relation relation, bool rebuild)
1776
{
1777
	Oid			old_reltype = relation->rd_rel->reltype;
1778

1779 1780 1781 1782 1783 1784 1785 1786
	/*
	 * As per notes above, a rel to be rebuilt MUST have refcnt > 0; while
	 * of course it would be a bad idea to blow away one with nonzero refcnt.
	 */
	Assert(rebuild ?
		   !RelationHasReferenceCountZero(relation) :
		   RelationHasReferenceCountZero(relation));

1787
	/*
1788
	 * Make sure smgr and lower levels close the relation's files, if they
B
Bruce Momjian 已提交
1789 1790 1791 1792
	 * weren't closed already.  If the relation is not getting deleted, the
	 * next smgr access should reopen the files automatically.	This ensures
	 * that the low-level file access state is updated after, say, a vacuum
	 * truncation.
1793
	 */
1794
	RelationCloseSmgr(relation);
1795

1796
	/*
B
Bruce Momjian 已提交
1797 1798 1799 1800
	 * Never, never ever blow away a nailed-in system relation, because we'd
	 * be unable to recover.  However, we must reset rd_targblock, in case we
	 * got called because of a relation cache flush that was triggered by
	 * VACUUM.
1801
	 *
1802 1803 1804
	 * If it's a nailed index, then we need to re-read the pg_class row to see
	 * if its relfilenode changed.	We can't necessarily do that here, because
	 * we might be in a failed transaction.  We assume it's okay to do it if
B
Bruce Momjian 已提交
1805 1806 1807
	 * there are open references to the relcache entry (cf notes for
	 * AtEOXact_RelationCache).  Otherwise just mark the entry as possibly
	 * invalid, and it'll be fixed when next opened.
1808 1809
	 */
	if (relation->rd_isnailed)
H
Hiroshi Inoue 已提交
1810
	{
1811
		relation->rd_targblock = InvalidBlockNumber;
1812 1813
		if (relation->rd_rel->relkind == RELKIND_INDEX)
		{
B
Bruce Momjian 已提交
1814
			relation->rd_isvalid = false;		/* needs to be revalidated */
1815
			if (relation->rd_refcnt > 1)
1816
				RelationReloadIndexInfo(relation);
1817
		}
1818
		return;
H
Hiroshi Inoue 已提交
1819
	}
1820

1821 1822 1823 1824
	/*
	 * Even non-system indexes should not be blown away if they are open and
	 * have valid index support information.  This avoids problems with active
	 * use of the index support information.  As with nailed indexes, we
B
Bruce Momjian 已提交
1825
	 * re-read the pg_class row to handle possible physical relocation of the
1826
	 * index, and we check for pg_index updates too.
1827 1828 1829 1830 1831
	 */
	if (relation->rd_rel->relkind == RELKIND_INDEX &&
		relation->rd_refcnt > 0 &&
		relation->rd_indexcxt != NULL)
	{
B
Bruce Momjian 已提交
1832
		relation->rd_isvalid = false;	/* needs to be revalidated */
1833
		RelationReloadIndexInfo(relation);
1834 1835 1836
		return;
	}

1837 1838
	/* Mark it invalid until we've finished rebuild */
	relation->rd_isvalid = false;
1839

1840
	/*
1841 1842
	 * Clear out catcache's entries for this relation.  This is a bit of
	 * a hack, but it's a convenient place to do it.
1843
	 */
1844
	CatalogCacheFlushRelation(RelationGetRelid(relation));
1845

1846
	/*
1847
	 * If we're really done with the relcache entry, blow it away. But if
B
Bruce Momjian 已提交
1848 1849 1850
	 * someone is still using it, reconstruct the whole deal without moving
	 * the physical RelationData record (so that the someone's pointer is
	 * still valid).
1851
	 */
1852
	if (!rebuild)
1853
	{
1854
		/* Flush any rowtype cache entry */
1855
		flush_rowtype_cache(old_reltype);
1856 1857 1858 1859 1860 1861

		/* Remove it from the hash table */
		RelationCacheDelete(relation);

		/* And release storage */
		RelationDestroyRelation(relation);
1862 1863 1864
	}
	else
	{
1865
		/*
1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888
		 * Our strategy for rebuilding an open relcache entry is to build
		 * a new entry from scratch, swap its contents with the old entry,
		 * and finally delete the new entry (along with any infrastructure
		 * swapped over from the old entry).  This is to avoid trouble in case
		 * an error causes us to lose control partway through.  The old entry
		 * will still be marked !rd_isvalid, so we'll try to rebuild it again
		 * on next access.  Meanwhile it's not any less valid than it was
		 * before, so any code that might expect to continue accessing it
		 * isn't hurt by the rebuild failure.  (Consider for example a
		 * subtransaction that ALTERs a table and then gets cancelled partway
		 * through the cache entry rebuild.  The outer transaction should
		 * still see the not-modified cache entry as valid.)  The worst
		 * consequence of an error is leaking the necessarily-unreferenced
		 * new entry, and this shouldn't happen often enough for that to be
		 * a big problem.
		 *
		 * When rebuilding an open relcache entry, we must preserve ref count
		 * and rd_createSubid/rd_newRelfilenodeSubid state.  Also attempt to
		 * preserve the pg_class entry (rd_rel), tupledesc, and rewrite-rule
		 * substructures in place, because various places assume that these
		 * structures won't move while they are working with an open relcache
		 * entry.  (Note: the refcount mechanism for tupledescs might someday
		 * allow us to remove this hack for the tupledesc.)
1889
		 *
1890 1891
		 * Note that this process does not touch CurrentResourceOwner; which
		 * is good because whatever ref counts the entry may have do not
B
Bruce Momjian 已提交
1892
		 * necessarily belong to that resource owner.
1893
		 */
1894
		Relation	newrel;
1895
		Oid			save_relid = RelationGetRelid(relation);
1896 1897 1898 1899 1900 1901
		bool		keep_tupdesc;
		bool		keep_rules;

		/* Build temporary entry, but don't link it into hashtable */
		newrel = RelationBuildDesc(save_relid, false);
		if (newrel == NULL)
1902
		{
1903
			/* Should only get here if relation was deleted */
1904
			flush_rowtype_cache(old_reltype);
1905 1906
			RelationCacheDelete(relation);
			RelationDestroyRelation(relation);
1907
			elog(ERROR, "relation %u deleted while still in use", save_relid);
1908
		}
1909

1910 1911 1912
		keep_tupdesc = equalTupleDescs(relation->rd_att, newrel->rd_att);
		keep_rules = equalRuleLocks(relation->rd_rules, newrel->rd_rules);
		if (!keep_tupdesc)
1913
			flush_rowtype_cache(old_reltype);
1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932

		/*
		 * Perform swapping of the relcache entry contents.  Within this
		 * process the old entry is momentarily invalid, so there *must*
		 * be no possibility of CHECK_FOR_INTERRUPTS within this sequence.
		 * Do it in all-in-line code for safety.
		 *
		 * Since the vast majority of fields should be swapped, our method
		 * is to swap the whole structures and then re-swap those few fields
		 * we didn't want swapped.
		 */
#define SWAPFIELD(fldtype, fldname) \
		do { \
			fldtype _tmp = newrel->fldname; \
			newrel->fldname = relation->fldname; \
			relation->fldname = _tmp; \
		} while (0)

		/* swap all Relation struct fields */
1933
		{
1934 1935 1936 1937 1938
			RelationData tmpstruct;

			memcpy(&tmpstruct, newrel, sizeof(RelationData));
			memcpy(newrel, relation, sizeof(RelationData));
			memcpy(relation, &tmpstruct, sizeof(RelationData));
1939
		}
1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957

		/* rd_smgr must not be swapped, due to back-links from smgr level */
		SWAPFIELD(SMgrRelation, rd_smgr);
		/* rd_refcnt must be preserved */
		SWAPFIELD(int, rd_refcnt);
		/* isnailed shouldn't change */
		Assert(newrel->rd_isnailed == relation->rd_isnailed);
		/* creation sub-XIDs must be preserved */
		SWAPFIELD(SubTransactionId, rd_createSubid);
		SWAPFIELD(SubTransactionId, rd_newRelfilenodeSubid);
		/* un-swap rd_rel pointers, swap contents instead */
		SWAPFIELD(Form_pg_class, rd_rel);
		/* ... but actually, we don't have to update newrel->rd_rel */
		memcpy(relation->rd_rel, newrel->rd_rel, CLASS_TUPLE_SIZE);
		/* preserve old tupledesc and rules if no logical change */
		if (keep_tupdesc)
			SWAPFIELD(TupleDesc, rd_att);
		if (keep_rules)
1958
		{
1959 1960
			SWAPFIELD(RuleLock *, rd_rules);
			SWAPFIELD(MemoryContext, rd_rulescxt);
1961
		}
1962 1963 1964 1965 1966 1967 1968
		/* pgstat_info must be preserved */
		SWAPFIELD(struct PgStat_TableStatus *, pgstat_info);

#undef SWAPFIELD

		/* And now we can throw away the temporary entry */
		RelationDestroyRelation(newrel);
1969
	}
1970 1971
}

1972
/*
1973 1974 1975 1976 1977
 * RelationFlushRelation
 *
 *	 Rebuild the relation if it is open (refcount > 0), else blow it away.
 */
static void
1978
RelationFlushRelation(Relation relation)
1979
{
1980 1981
	if (relation->rd_createSubid != InvalidSubTransactionId ||
		relation->rd_newRelfilenodeSubid != InvalidSubTransactionId)
1982 1983
	{
		/*
1984 1985
		 * New relcache entries are always rebuilt, not flushed; else we'd
		 * forget the "new" status of the relation, which is a useful
1986
		 * optimization to have.  Ditto for the new-relfilenode status.
1987 1988 1989 1990
		 *
		 * The rel could have zero refcnt here, so temporarily increment
		 * the refcnt to ensure it's safe to rebuild it.  We can assume that
		 * the current transaction has some lock on the rel already.
1991
		 */
1992 1993 1994
		RelationIncrementReferenceCount(relation);
		RelationClearRelation(relation, true);
		RelationDecrementReferenceCount(relation);
1995 1996 1997 1998
	}
	else
	{
		/*
1999
		 * Pre-existing rels can be dropped from the relcache if not open.
2000
		 */
2001
		bool	rebuild = !RelationHasReferenceCountZero(relation);
2002

2003 2004
		RelationClearRelation(relation, rebuild);
	}
2005 2006
}

2007
/*
2008
 * RelationForgetRelation - unconditionally remove a relcache entry
2009
 *
2010 2011
 *		   External interface for destroying a relcache entry when we
 *		   drop the relation.
2012 2013
 */
void
2014
RelationForgetRelation(Oid rid)
2015
{
2016
	Relation	relation;
2017 2018 2019

	RelationIdCacheLookup(rid, relation);

2020 2021 2022 2023
	if (!PointerIsValid(relation))
		return;					/* not in cache, nothing to do */

	if (!RelationHasReferenceCountZero(relation))
2024
		elog(ERROR, "relation %u is still open", rid);
2025 2026 2027

	/* Unconditionally destroy the relcache entry */
	RelationClearRelation(relation, false);
2028 2029
}

2030
/*
2031
 *		RelationCacheInvalidateEntry
2032 2033 2034
 *
 *		This routine is invoked for SI cache flush messages.
 *
2035 2036
 * Any relcache entry matching the relid must be flushed.  (Note: caller has
 * already determined that the relid belongs to our database or is a shared
2037
 * relation.)
2038 2039 2040 2041 2042 2043
 *
 * We used to skip local relations, on the grounds that they could
 * not be targets of cross-backend SI update messages; but it seems
 * safer to process them, so that our *own* SI update messages will
 * have the same effects during CommandCounterIncrement for both
 * local and nonlocal relations.
2044 2045
 */
void
2046
RelationCacheInvalidateEntry(Oid relationId)
2047
{
2048
	Relation	relation;
2049 2050 2051

	RelationIdCacheLookup(relationId, relation);

2052
	if (PointerIsValid(relation))
2053
	{
2054
		relcacheInvalsReceived++;
2055
		RelationFlushRelation(relation);
2056
	}
2057 2058 2059 2060
}

/*
 * RelationCacheInvalidate
2061
 *	 Blow away cached relation descriptors that have zero reference counts,
B
Bruce Momjian 已提交
2062
 *	 and rebuild those with positive reference counts.	Also reset the smgr
2063
 *	 relation cache.
2064
 *
2065
 *	 This is currently used only to recover from SI message buffer overflow,
2066
 *	 so we do not touch new-in-transaction relations; they cannot be targets
2067 2068
 *	 of cross-backend SI updates (and our own updates now go through a
 *	 separate linked list that isn't limited by the SI message buffer size).
2069 2070
 *	 Likewise, we need not discard new-relfilenode-in-transaction hints,
 *	 since any invalidation of those would be a local event.
2071 2072 2073
 *
 *	 We do this in two phases: the first pass deletes deletable items, and
 *	 the second one rebuilds the rebuildable items.  This is essential for
2074
 *	 safety, because hash_seq_search only copes with concurrent deletion of
B
Bruce Momjian 已提交
2075
 *	 the element it is currently visiting.	If a second SI overflow were to
2076 2077 2078 2079
 *	 occur while we are walking the table, resulting in recursive entry to
 *	 this routine, we could crash because the inner invocation blows away
 *	 the entry next to be visited by the outer scan.  But this way is OK,
 *	 because (a) during the first pass we won't process any more SI messages,
2080
 *	 so hash_seq_search will complete safely; (b) during the second pass we
2081
 *	 only hold onto pointers to nondeletable entries.
2082 2083 2084 2085 2086 2087
 *
 *	 The two-phase approach also makes it easy to ensure that we process
 *	 nailed-in-cache indexes before other nondeletable items, and that we
 *	 process pg_class_oid_index first of all.  In scenarios where a nailed
 *	 index has been given a new relfilenode, we have to detect that update
 *	 before the nailed index is used in reloading any other relcache entry.
2088 2089
 */
void
2090
RelationCacheInvalidate(void)
2091
{
2092
	HASH_SEQ_STATUS status;
2093
	RelIdCacheEnt *idhentry;
2094
	Relation	relation;
2095
	List	   *rebuildFirstList = NIL;
B
Bruce Momjian 已提交
2096
	List	   *rebuildList = NIL;
2097
	ListCell   *l;
2098 2099

	/* Phase 1 */
2100
	hash_seq_init(&status, RelationIdCache);
2101

2102
	while ((idhentry = (RelIdCacheEnt *) hash_seq_search(&status)) != NULL)
2103
	{
2104
		relation = idhentry->reldesc;
2105

2106
		/* Must close all smgr references to avoid leaving dangling ptrs */
2107
		RelationCloseSmgr(relation);
2108

2109
		/* Ignore new relations, since they are never SI targets */
2110
		if (relation->rd_createSubid != InvalidSubTransactionId)
2111
			continue;
2112

2113 2114
		relcacheInvalsReceived++;

2115
		if (RelationHasReferenceCountZero(relation))
2116 2117
		{
			/* Delete this entry immediately */
2118
			Assert(!relation->rd_isnailed);
2119 2120 2121 2122
			RelationClearRelation(relation, false);
		}
		else
		{
2123 2124
			/*
			 * Add this entry to list of stuff to rebuild in second pass.
B
Bruce Momjian 已提交
2125 2126
			 * pg_class_oid_index goes on the front of rebuildFirstList, other
			 * nailed indexes on the back, and everything else into
2127 2128 2129 2130 2131
			 * rebuildList (in no particular order).
			 */
			if (relation->rd_isnailed &&
				relation->rd_rel->relkind == RELKIND_INDEX)
			{
2132
				if (RelationGetRelid(relation) == ClassOidIndexId)
2133 2134 2135 2136 2137 2138
					rebuildFirstList = lcons(relation, rebuildFirstList);
				else
					rebuildFirstList = lappend(rebuildFirstList, relation);
			}
			else
				rebuildList = lcons(relation, rebuildList);
2139
		}
2140
	}
2141

2142
	/*
B
Bruce Momjian 已提交
2143 2144 2145
	 * Now zap any remaining smgr cache entries.  This must happen before we
	 * start to rebuild entries, since that may involve catalog fetches which
	 * will re-open catalog files.
2146 2147 2148
	 */
	smgrcloseall();

2149
	/* Phase 2: rebuild the items found to need rebuild in phase 1 */
2150 2151 2152 2153 2154 2155
	foreach(l, rebuildFirstList)
	{
		relation = (Relation) lfirst(l);
		RelationClearRelation(relation, true);
	}
	list_free(rebuildFirstList);
2156
	foreach(l, rebuildList)
2157
	{
2158 2159
		relation = (Relation) lfirst(l);
		RelationClearRelation(relation, true);
2160
	}
2161
	list_free(rebuildList);
2162
}
2163

2164
/*
2165
 * AtEOXact_RelationCache
2166
 *
2167
 *	Clean up the relcache at main-transaction commit or abort.
2168 2169 2170 2171 2172
 *
 * Note: this must be called *before* processing invalidation messages.
 * In the case of abort, we don't want to try to rebuild any invalidated
 * cache entries (since we can't safely do database accesses).  Therefore
 * we must reset refcnts before handling pending invalidations.
2173 2174 2175 2176 2177 2178
 *
 * As of PostgreSQL 8.1, relcache refcnts should get released by the
 * ResourceOwner mechanism.  This routine just does a debugging
 * cross-check that no pins remain.  However, we also need to do special
 * cleanup when the current transaction created any relations or made use
 * of forced index lists.
2179 2180
 */
void
2181
AtEOXact_RelationCache(bool isCommit)
2182
{
2183
	HASH_SEQ_STATUS status;
2184
	RelIdCacheEnt *idhentry;
2185

2186 2187
	/*
	 * To speed up transaction exit, we want to avoid scanning the relcache
B
Bruce Momjian 已提交
2188 2189 2190 2191
	 * unless there is actually something for this routine to do.  Other than
	 * the debug-only Assert checks, most transactions don't create any work
	 * for us to do here, so we keep a static flag that gets set if there is
	 * anything to do.	(Currently, this means either a relation is created in
2192
	 * the current xact, or one is given a new relfilenode, or an index list
B
Bruce Momjian 已提交
2193
	 * is forced.)	For simplicity, the flag remains set till end of top-level
2194 2195
	 * transaction, even though we could clear it at subtransaction end in
	 * some cases.
2196 2197 2198 2199 2200 2201 2202 2203
	 */
	if (!need_eoxact_work
#ifdef USE_ASSERT_CHECKING
		&& !assert_enabled
#endif
		)
		return;

2204
	hash_seq_init(&status, RelationIdCache);
2205

2206
	while ((idhentry = (RelIdCacheEnt *) hash_seq_search(&status)) != NULL)
2207
	{
2208
		Relation	relation = idhentry->reldesc;
2209 2210 2211 2212 2213

		/*
		 * The relcache entry's ref count should be back to its normal
		 * not-in-a-transaction state: 0 unless it's nailed in cache.
		 *
B
Bruce Momjian 已提交
2214 2215 2216
		 * In bootstrap mode, this is NOT true, so don't check it --- the
		 * bootstrap code expects relations to stay open across start/commit
		 * transaction calls.  (That seems bogus, but it's not worth fixing.)
2217 2218 2219 2220 2221 2222 2223 2224 2225 2226
		 */
#ifdef USE_ASSERT_CHECKING
		if (!IsBootstrapProcessingMode())
		{
			int			expected_refcnt;

			expected_refcnt = relation->rd_isnailed ? 1 : 0;
			Assert(relation->rd_refcnt == expected_refcnt);
		}
#endif
2227

2228 2229 2230
		/*
		 * Is it a relation created in the current transaction?
		 *
B
Bruce Momjian 已提交
2231 2232 2233 2234 2235 2236
		 * During commit, reset the flag to zero, since we are now out of the
		 * creating transaction.  During abort, simply delete the relcache
		 * entry --- it isn't interesting any longer.  (NOTE: if we have
		 * forgotten the new-ness of a new relation due to a forced cache
		 * flush, the entry will get deleted anyway by shared-cache-inval
		 * processing of the aborted pg_class insertion.)
2237
		 */
2238
		if (relation->rd_createSubid != InvalidSubTransactionId)
2239
		{
2240
			if (isCommit)
2241
				relation->rd_createSubid = InvalidSubTransactionId;
2242 2243 2244 2245 2246 2247
			else
			{
				RelationClearRelation(relation, false);
				continue;
			}
		}
2248 2249 2250 2251

		/*
		 * Likewise, reset the hint about the relfilenode being new.
		 */
2252
		relation->rd_newRelfilenodeSubid = InvalidSubTransactionId;
2253

2254 2255 2256 2257 2258
		/*
		 * Flush any temporary index list.
		 */
		if (relation->rd_indexvalid == 2)
		{
2259
			list_free(relation->rd_indexlist);
2260
			relation->rd_indexlist = NIL;
2261
			relation->rd_oidindex = InvalidOid;
2262 2263
			relation->rd_indexvalid = 0;
		}
2264
	}
2265

2266 2267
	/* Once done with the transaction, we can reset need_eoxact_work */
	need_eoxact_work = false;
2268
}
2269

2270 2271 2272 2273 2274 2275 2276 2277
/*
 * AtEOSubXact_RelationCache
 *
 *	Clean up the relcache at sub-transaction commit or abort.
 *
 * Note: this must be called *before* processing invalidation messages.
 */
void
2278 2279
AtEOSubXact_RelationCache(bool isCommit, SubTransactionId mySubid,
						  SubTransactionId parentSubid)
2280 2281 2282 2283
{
	HASH_SEQ_STATUS status;
	RelIdCacheEnt *idhentry;

2284
	/*
2285 2286
	 * Skip the relcache scan if nothing to do --- see notes for
	 * AtEOXact_RelationCache.
2287
	 */
2288
	if (!need_eoxact_work)
2289 2290
		return;

2291 2292 2293 2294 2295 2296 2297 2298 2299
	hash_seq_init(&status, RelationIdCache);

	while ((idhentry = (RelIdCacheEnt *) hash_seq_search(&status)) != NULL)
	{
		Relation	relation = idhentry->reldesc;

		/*
		 * Is it a relation created in the current subtransaction?
		 *
2300 2301
		 * During subcommit, mark it as belonging to the parent, instead.
		 * During subabort, simply delete the relcache entry.
2302
		 */
2303
		if (relation->rd_createSubid == mySubid)
2304 2305
		{
			if (isCommit)
2306
				relation->rd_createSubid = parentSubid;
2307 2308 2309 2310 2311 2312
			else
			{
				RelationClearRelation(relation, false);
				continue;
			}
		}
2313 2314

		/*
B
Bruce Momjian 已提交
2315 2316
		 * Likewise, update or drop any new-relfilenode-in-subtransaction
		 * hint.
2317
		 */
2318 2319 2320 2321 2322
		if (relation->rd_newRelfilenodeSubid == mySubid)
		{
			if (isCommit)
				relation->rd_newRelfilenodeSubid = parentSubid;
			else
2323
				relation->rd_newRelfilenodeSubid = InvalidSubTransactionId;
2324
		}
2325 2326 2327 2328 2329 2330 2331 2332

		/*
		 * Flush any temporary index list.
		 */
		if (relation->rd_indexvalid == 2)
		{
			list_free(relation->rd_indexlist);
			relation->rd_indexlist = NIL;
2333
			relation->rd_oidindex = InvalidOid;
2334 2335 2336 2337 2338
			relation->rd_indexvalid = 0;
		}
	}
}

2339 2340 2341 2342
/*
 * RelationCacheMarkNewRelfilenode
 *
 *	Mark the rel as having been given a new relfilenode in the current
B
Bruce Momjian 已提交
2343
 *	(sub) transaction.	This is a hint that can be used to optimize
2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355
 *	later operations on the rel in the same transaction.
 */
void
RelationCacheMarkNewRelfilenode(Relation rel)
{
	/* Mark it... */
	rel->rd_newRelfilenodeSubid = GetCurrentSubTransactionId();
	/* ... and now we have eoxact cleanup work to do */
	need_eoxact_work = true;
}


2356
/*
2357 2358 2359
 *		RelationBuildLocalRelation
 *			Build a relcache entry for an about-to-be-created relation,
 *			and enter it into the relcache.
2360
 */
2361 2362
Relation
RelationBuildLocalRelation(const char *relname,
2363
						   Oid relnamespace,
2364
						   TupleDesc tupDesc,
2365 2366
						   Oid relid,
						   Oid reltablespace,
2367
						   bool shared_relation)
2368
{
2369
	Relation	rel;
2370
	MemoryContext oldcxt;
2371 2372
	int			natts = tupDesc->natts;
	int			i;
2373
	bool		has_not_null;
2374
	bool		nailit;
2375

2376
	AssertArg(natts >= 0);
2377

2378 2379 2380
	/*
	 * check for creation of a rel that must be nailed in cache.
	 *
2381
	 * XXX this list had better match RelationCacheInitializePhase2's list.
2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395
	 */
	switch (relid)
	{
		case RelationRelationId:
		case AttributeRelationId:
		case ProcedureRelationId:
		case TypeRelationId:
			nailit = true;
			break;
		default:
			nailit = false;
			break;
	}

2396 2397
	/*
	 * check that hardwired list of shared rels matches what's in the
B
Bruce Momjian 已提交
2398 2399 2400
	 * bootstrap .bki file.  If you get a failure here during initdb, you
	 * probably need to fix IsSharedRelation() to match whatever you've done
	 * to the set of shared relations.
2401 2402 2403 2404 2405
	 */
	if (shared_relation != IsSharedRelation(relid))
		elog(ERROR, "shared_relation flag for \"%s\" does not match IsSharedRelation(%u)",
			 relname, relid);

2406 2407 2408 2409 2410
	/*
	 * switch to the cache context to create the relcache entry.
	 */
	if (!CacheMemoryContext)
		CreateCacheMemoryContext();
2411

2412 2413
	oldcxt = MemoryContextSwitchTo(CacheMemoryContext);

2414
	/*
2415
	 * allocate a new relation descriptor and fill in basic state fields.
2416
	 */
2417
	rel = (Relation) palloc0(sizeof(RelationData));
2418

2419 2420 2421
	rel->rd_targblock = InvalidBlockNumber;

	/* make sure relation is marked as having no open file yet */
2422
	rel->rd_smgr = NULL;
2423

2424 2425 2426
	/* mark it nailed if appropriate */
	rel->rd_isnailed = nailit;

2427
	rel->rd_refcnt = nailit ? 1 : 0;
2428

2429
	/* it's being created in this transaction */
2430
	rel->rd_createSubid = GetCurrentSubTransactionId();
2431
	rel->rd_newRelfilenodeSubid = InvalidSubTransactionId;
2432

2433
	/* must flag that we have rels created in this transaction */
2434
	need_eoxact_work = true;
2435

2436
	/* is it a temporary relation? */
2437
	rel->rd_istemp = isTempOrToastNamespace(relnamespace);
2438

2439
	/*
2440
	 * create a new tuple descriptor from the one passed in.  We do this
B
Bruce Momjian 已提交
2441 2442 2443 2444
	 * partly to copy it into the cache context, and partly because the new
	 * relation can't have any defaults or constraints yet; they have to be
	 * added in later steps, because they require additions to multiple system
	 * catalogs.  We can copy attnotnull constraints here, however.
2445
	 */
2446
	rel->rd_att = CreateTupleDescCopy(tupDesc);
2447
	rel->rd_att->tdrefcount = 1;	/* mark as refcounted */
2448
	has_not_null = false;
2449
	for (i = 0; i < natts; i++)
2450
	{
2451
		rel->rd_att->attrs[i]->attnotnull = tupDesc->attrs[i]->attnotnull;
2452 2453 2454 2455 2456 2457 2458 2459 2460 2461
		has_not_null |= tupDesc->attrs[i]->attnotnull;
	}

	if (has_not_null)
	{
		TupleConstr *constr = (TupleConstr *) palloc0(sizeof(TupleConstr));

		constr->has_not_null = true;
		rel->rd_att->constr = constr;
	}
2462 2463 2464 2465

	/*
	 * initialize relation tuple form (caller may add/override data later)
	 */
2466
	rel->rd_rel = (Form_pg_class) palloc0(CLASS_TUPLE_SIZE);
2467

2468 2469
	namestrcpy(&rel->rd_rel->relname, relname);
	rel->rd_rel->relnamespace = relnamespace;
2470 2471

	rel->rd_rel->relkind = RELKIND_UNCATALOGED;
2472
	rel->rd_rel->relhasoids = rel->rd_att->tdhasoid;
2473 2474
	rel->rd_rel->relnatts = natts;
	rel->rd_rel->reltype = InvalidOid;
2475 2476
	/* needed when bootstrapping: */
	rel->rd_rel->relowner = BOOTSTRAP_SUPERUSERID;
2477 2478

	/*
B
Bruce Momjian 已提交
2479 2480 2481
	 * Insert relation physical and logical identifiers (OIDs) into the right
	 * places.	Note that the physical ID (relfilenode) is initially the same
	 * as the logical ID (OID).
2482
	 */
2483
	rel->rd_rel->relisshared = shared_relation;
2484 2485 2486 2487 2488 2489

	RelationGetRelid(rel) = relid;

	for (i = 0; i < natts; i++)
		rel->rd_att->attrs[i]->attrelid = relid;

2490 2491
	rel->rd_rel->relfilenode = relid;
	rel->rd_rel->reltablespace = reltablespace;
2492

2493
	RelationInitLockInfo(rel);	/* see lmgr.c */
2494

2495 2496
	RelationInitPhysicalAddr(rel);

2497 2498 2499 2500
	/*
	 * Okay to insert into the relcache hash tables.
	 */
	RelationCacheInsert(rel);
2501

2502 2503 2504
	/*
	 * done building relcache entry.
	 */
2505
	MemoryContextSwitchTo(oldcxt);
2506

2507 2508 2509
	/* It's fully valid */
	rel->rd_isvalid = true;

2510 2511 2512 2513 2514
	/*
	 * Caller expects us to pin the returned entry.
	 */
	RelationIncrementReferenceCount(rel);

2515
	return rel;
2516 2517
}

2518
/*
2519
 *		RelationCacheInitialize
2520
 *
2521 2522
 *		This initializes the relation descriptor cache.  At the time
 *		that this is invoked, we can't do database access yet (mainly
2523 2524 2525 2526 2527
 *		because the transaction subsystem is not up); all we are doing
 *		is making an empty cache hashtable.  This must be done before
 *		starting the initialization transaction, because otherwise
 *		AtEOXact_RelationCache would crash if that transaction aborts
 *		before we can get the relcache set up.
2528 2529
 */

2530
#define INITRELCACHESIZE		400
2531 2532

void
2533
RelationCacheInitialize(void)
2534
{
2535 2536
	MemoryContext oldcxt;
	HASHCTL		ctl;
2537

2538
	/*
B
Bruce Momjian 已提交
2539
	 * switch to cache memory context
2540
	 */
2541 2542
	if (!CacheMemoryContext)
		CreateCacheMemoryContext();
2543

2544
	oldcxt = MemoryContextSwitchTo(CacheMemoryContext);
2545

2546
	/*
2547
	 * create hashtable that indexes the relcache
2548
	 */
2549
	MemSet(&ctl, 0, sizeof(ctl));
2550
	ctl.keysize = sizeof(Oid);
2551
	ctl.entrysize = sizeof(RelIdCacheEnt);
2552
	ctl.hash = oid_hash;
2553 2554
	RelationIdCache = hash_create("Relcache by OID", INITRELCACHESIZE,
								  &ctl, HASH_ELEM | HASH_FUNCTION);
2555

2556 2557 2558 2559 2560 2561 2562 2563 2564 2565 2566
	MemoryContextSwitchTo(oldcxt);
}

/*
 *		RelationCacheInitializePhase2
 *
 *		This is called as soon as the catcache and transaction system
 *		are functional.  At this point we can actually read data from
 *		the system catalogs.  We first try to read pre-computed relcache
 *		entries from the pg_internal.init file.  If that's missing or
 *		broken, make phony entries for the minimum set of nailed-in-cache
B
Bruce Momjian 已提交
2567
 *		relations.	Then (unless bootstrapping) make sure we have entries
2568 2569 2570 2571 2572 2573 2574 2575 2576 2577
 *		for the critical system indexes.  Once we've done all this, we
 *		have enough infrastructure to open any system catalog or use any
 *		catcache.  The last step is to rewrite pg_internal.init if needed.
 */
void
RelationCacheInitializePhase2(void)
{
	HASH_SEQ_STATUS status;
	RelIdCacheEnt *idhentry;
	MemoryContext oldcxt;
B
Bruce Momjian 已提交
2578
	bool		needNewCacheFile = false;
2579

2580
	/*
2581 2582 2583 2584 2585 2586 2587 2588
	 * switch to cache memory context
	 */
	oldcxt = MemoryContextSwitchTo(CacheMemoryContext);

	/*
	 * Try to load the relcache cache file.  If unsuccessful, bootstrap the
	 * cache with pre-made descriptors for the critical "nailed-in" system
	 * catalogs.
2589
	 */
2590
	if (IsBootstrapProcessingMode() ||
B
Bruce Momjian 已提交
2591
		!load_relcache_init_file())
2592
	{
2593 2594
		needNewCacheFile = true;

2595
		formrdesc("pg_class", PG_CLASS_RELTYPE_OID,
2596
				  true, Natts_pg_class, Desc_pg_class);
2597
		formrdesc("pg_attribute", PG_ATTRIBUTE_RELTYPE_OID,
2598
				  false, Natts_pg_attribute, Desc_pg_attribute);
2599
		formrdesc("pg_proc", PG_PROC_RELTYPE_OID,
2600
				  true, Natts_pg_proc, Desc_pg_proc);
2601
		formrdesc("pg_type", PG_TYPE_RELTYPE_OID,
2602
				  true, Natts_pg_type, Desc_pg_type);
2603 2604 2605

#define NUM_CRITICAL_RELS	4	/* fix if you change list above */
	}
2606 2607

	MemoryContextSwitchTo(oldcxt);
2608

2609
	/* In bootstrap mode, the faked-up formrdesc info is all we'll have */
2610 2611 2612
	if (IsBootstrapProcessingMode())
		return;

2613
	/*
B
Bruce Momjian 已提交
2614
	 * If we didn't get the critical system indexes loaded into relcache, do
2615 2616
	 * so now.	These are critical because the catcache and/or opclass cache
	 * depend on them for fetches done during relcache load.  Thus, we have an
B
Bruce Momjian 已提交
2617 2618 2619 2620 2621 2622
	 * infinite-recursion problem.	We can break the recursion by doing
	 * heapscans instead of indexscans at certain key spots. To avoid hobbling
	 * performance, we only want to do that until we have the critical indexes
	 * loaded into relcache.  Thus, the flag criticalRelcachesBuilt is used to
	 * decide whether to do heapscan or indexscan at the key spots, and we set
	 * it true after we've loaded the critical indexes.
2623
	 *
B
Bruce Momjian 已提交
2624 2625 2626 2627 2628 2629
	 * The critical indexes are marked as "nailed in cache", partly to make it
	 * easy for load_relcache_init_file to count them, but mainly because we
	 * cannot flush and rebuild them once we've set criticalRelcachesBuilt to
	 * true.  (NOTE: perhaps it would be possible to reload them by
	 * temporarily setting criticalRelcachesBuilt to false again.  For now,
	 * though, we just nail 'em in.)
2630 2631 2632 2633
	 *
	 * RewriteRelRulenameIndexId and TriggerRelidNameIndexId are not critical
	 * in the same way as the others, because the critical catalogs don't
	 * (currently) have any rules or triggers, and so these indexes can be
B
Bruce Momjian 已提交
2634
	 * rebuilt without inducing recursion.	However they are used during
2635 2636
	 * relcache load when a rel does have rules or triggers, so we choose to
	 * nail them for performance reasons.
2637
	 */
B
Bruce Momjian 已提交
2638
	if (!criticalRelcachesBuilt)
2639
	{
2640 2641 2642 2643 2644 2645 2646 2647 2648 2649 2650 2651 2652 2653 2654 2655 2656 2657
		load_critical_index(ClassOidIndexId,
							RelationRelationId);
		load_critical_index(AttributeRelidNumIndexId,
							AttributeRelationId);
		load_critical_index(IndexRelidIndexId,
							IndexRelationId);
		load_critical_index(OpclassOidIndexId,
							OperatorClassRelationId);
		load_critical_index(AccessMethodStrategyIndexId,
							AccessMethodOperatorRelationId);
		load_critical_index(AccessMethodProcedureIndexId,
							AccessMethodProcedureRelationId);
		load_critical_index(OperatorOidIndexId,
							OperatorRelationId);
		load_critical_index(RewriteRelRulenameIndexId,
							RewriteRelationId);
		load_critical_index(TriggerRelidNameIndexId,
							TriggerRelationId);
2658

2659
#define NUM_CRITICAL_INDEXES	9		/* fix if you change list above */
2660 2661 2662 2663 2664

		criticalRelcachesBuilt = true;
	}

	/*
B
Bruce Momjian 已提交
2665 2666 2667 2668 2669 2670
	 * Now, scan all the relcache entries and update anything that might be
	 * wrong in the results from formrdesc or the relcache cache file. If we
	 * faked up relcache entries using formrdesc, then read the real pg_class
	 * rows and replace the fake entries with them. Also, if any of the
	 * relcache entries have rules or triggers, load that info the hard way
	 * since it isn't recorded in the cache file.
2671 2672 2673 2674 2675 2676 2677 2678
	 *
	 * Whenever we access the catalogs to read data, there is a possibility
	 * of a shared-inval cache flush causing relcache entries to be removed.
	 * Since hash_seq_search only guarantees to still work after the *current*
	 * entry is removed, it's unsafe to continue the hashtable scan afterward.
	 * We handle this by restarting the scan from scratch after each access.
	 * This is theoretically O(N^2), but the number of entries that actually
	 * need to be fixed is small enough that it doesn't matter.
2679
	 */
2680
	hash_seq_init(&status, RelationIdCache);
2681

2682
	while ((idhentry = (RelIdCacheEnt *) hash_seq_search(&status)) != NULL)
2683
	{
2684
		Relation	relation = idhentry->reldesc;
2685 2686 2687 2688 2689 2690
		bool		restart = false;

		/*
		 * Make sure *this* entry doesn't get flushed while we work with it.
		 */
		RelationIncrementReferenceCount(relation);
2691

2692
		/*
2693
		 * If it's a faked-up entry, read the real pg_class tuple.
2694
		 */
2695
		if (relation->rd_rel->relowner == InvalidOid)
2696 2697 2698
		{
			HeapTuple	htup;
			Form_pg_class relp;
B
Bruce Momjian 已提交
2699

2700
			htup = SearchSysCache(RELOID,
B
Bruce Momjian 已提交
2701
								ObjectIdGetDatum(RelationGetRelid(relation)),
2702 2703
								  0, 0, 0);
			if (!HeapTupleIsValid(htup))
2704 2705
				elog(FATAL, "cache lookup failed for relation %u",
					 RelationGetRelid(relation));
2706
			relp = (Form_pg_class) GETSTRUCT(htup);
B
Bruce Momjian 已提交
2707

2708 2709 2710 2711 2712
			/*
			 * Copy tuple to relation->rd_rel. (See notes in
			 * AllocateRelationDesc())
			 */
			memcpy((char *) relation->rd_rel, (char *) relp, CLASS_TUPLE_SIZE);
2713

2714 2715 2716 2717 2718
			/* Update rd_options while we have the tuple */
			if (relation->rd_options)
				pfree(relation->rd_options);
			RelationParseRelOptions(relation, htup);

2719
			/*
2720 2721 2722 2723
			 * Check the values in rd_att were set up correctly.  (We cannot
			 * just copy them over now: formrdesc must have set up the
			 * rd_att data correctly to start with, because it may already
			 * have been copied into one or more catcache entries.)
2724
			 */
2725 2726 2727
			Assert(relation->rd_att->tdtypeid == relp->reltype);
			Assert(relation->rd_att->tdtypmod == -1);
			Assert(relation->rd_att->tdhasoid == relp->relhasoids);
2728

2729
			ReleaseSysCache(htup);
2730 2731 2732 2733 2734 2735 2736

			/* relowner had better be OK now, else we'll loop forever */
			if (relation->rd_rel->relowner == InvalidOid)
				elog(ERROR, "invalid relowner in pg_class entry for \"%s\"",
					 RelationGetRelationName(relation));

			restart = true;
2737 2738 2739 2740
		}

		/*
		 * Fix data that isn't saved in relcache cache file.
2741 2742 2743 2744 2745
		 *
		 * relhasrules or reltriggers could possibly be wrong or out of
		 * date.  If we don't actually find any rules or triggers, clear the
		 * local copy of the flag so that we don't get into an infinite loop
		 * here.  We don't make any attempt to fix the pg_class entry, though.
2746 2747
		 */
		if (relation->rd_rel->relhasrules && relation->rd_rules == NULL)
2748
		{
2749
			RelationBuildRuleLock(relation);
2750 2751 2752 2753
			if (relation->rd_rules == NULL)
				relation->rd_rel->relhasrules = false;
			restart = true;
		}
2754
		if (relation->rd_rel->reltriggers > 0 && relation->trigdesc == NULL)
2755
		{
2756
			RelationBuildTriggers(relation);
2757 2758 2759 2760 2761 2762 2763 2764 2765 2766 2767 2768 2769 2770
			if (relation->trigdesc == NULL)
				relation->rd_rel->reltriggers = 0;
			restart = true;
		}

		/* Release hold on the relation */
		RelationDecrementReferenceCount(relation);

		/* Now, restart the hashtable scan if needed */
		if (restart)
		{
			hash_seq_term(&status);
			hash_seq_init(&status, RelationIdCache);
		}
2771
	}
2772

2773 2774 2775
	/*
	 * Lastly, write out a new relcache cache file if one is needed.
	 */
2776 2777 2778
	if (needNewCacheFile)
	{
		/*
B
Bruce Momjian 已提交
2779 2780 2781 2782
		 * Force all the catcaches to finish initializing and thereby open the
		 * catalogs and indexes they use.  This will preload the relcache with
		 * entries for all the most important system catalogs and indexes, so
		 * that the init file will be most useful for future backends.
2783 2784 2785 2786 2787 2788 2789 2790
		 */
		InitCatalogCachePhase2();

		/* now write the file */
		write_relcache_init_file();
	}
}

2791 2792 2793 2794 2795 2796 2797 2798 2799 2800 2801 2802 2803 2804 2805 2806 2807 2808 2809 2810 2811 2812 2813 2814 2815 2816 2817 2818
/*
 * Load one critical system index into the relcache
 *
 * indexoid is the OID of the target index, heapoid is the OID of the catalog
 * it belongs to.
 */
static void
load_critical_index(Oid indexoid, Oid heapoid)
{
	Relation	ird;

	/*
	 * We must lock the underlying catalog before locking the index to avoid
	 * deadlock, since RelationBuildDesc might well need to read the catalog,
	 * and if anyone else is exclusive-locking this catalog and index they'll
	 * be doing it in that order.
	 */
	LockRelationOid(heapoid, AccessShareLock);
	LockRelationOid(indexoid, AccessShareLock);
	ird = RelationBuildDesc(indexoid, true);
	if (ird == NULL)
		elog(PANIC, "could not open critical system index %u", indexoid);
	ird->rd_isnailed = true;
	ird->rd_refcnt = 1;
	UnlockRelationOid(indexoid, AccessShareLock);
	UnlockRelationOid(heapoid, AccessShareLock);
}

2819
/*
2820
 * GetPgClassDescriptor -- get a predefined tuple descriptor for pg_class
2821 2822 2823
 * GetPgIndexDescriptor -- get a predefined tuple descriptor for pg_index
 *
 * We need this kluge because we have to be able to access non-fixed-width
2824 2825 2826 2827 2828 2829
 * fields of pg_class and pg_index before we have the standard catalog caches
 * available.  We use predefined data that's set up in just the same way as
 * the bootstrapped reldescs used by formrdesc().  The resulting tupdesc is
 * not 100% kosher: it does not have the correct rowtype OID in tdtypeid, nor
 * does it have a TupleConstr field.  But it's good enough for the purpose of
 * extracting fields.
2830 2831
 */
static TupleDesc
2832
BuildHardcodedDescriptor(int natts, Form_pg_attribute attrs, bool hasoids)
2833
{
2834
	TupleDesc	result;
2835 2836 2837 2838 2839
	MemoryContext oldcxt;
	int			i;

	oldcxt = MemoryContextSwitchTo(CacheMemoryContext);

2840
	result = CreateTemplateTupleDesc(natts, hasoids);
B
Bruce Momjian 已提交
2841
	result->tdtypeid = RECORDOID;		/* not right, but we don't care */
2842
	result->tdtypmod = -1;
2843

2844
	for (i = 0; i < natts; i++)
2845
	{
2846
		memcpy(result->attrs[i], &attrs[i], ATTRIBUTE_TUPLE_SIZE);
2847
		/* make sure attcacheoff is valid */
2848
		result->attrs[i]->attcacheoff = -1;
2849 2850 2851
	}

	/* initialize first attribute's attcacheoff, cf RelationBuildTupleDesc */
2852
	result->attrs[0]->attcacheoff = 0;
2853 2854 2855 2856 2857

	/* Note: we don't bother to set up a TupleConstr entry */

	MemoryContextSwitchTo(oldcxt);

2858 2859 2860 2861 2862 2863 2864 2865 2866 2867 2868 2869 2870 2871 2872 2873 2874 2875 2876 2877 2878 2879 2880 2881 2882 2883 2884 2885
	return result;
}

static TupleDesc
GetPgClassDescriptor(void)
{
	static TupleDesc pgclassdesc = NULL;

	/* Already done? */
	if (pgclassdesc == NULL)
		pgclassdesc = BuildHardcodedDescriptor(Natts_pg_class,
											   Desc_pg_class,
											   true);

	return pgclassdesc;
}

static TupleDesc
GetPgIndexDescriptor(void)
{
	static TupleDesc pgindexdesc = NULL;

	/* Already done? */
	if (pgindexdesc == NULL)
		pgindexdesc = BuildHardcodedDescriptor(Natts_pg_index,
											   Desc_pg_index,
											   false);

2886 2887 2888
	return pgindexdesc;
}

2889
static void
2890
AttrDefaultFetch(Relation relation)
2891
{
2892 2893 2894
	AttrDefault *attrdef = relation->rd_att->constr->defval;
	int			ndef = relation->rd_att->constr->num_defval;
	Relation	adrel;
2895
	SysScanDesc adscan;
2896
	ScanKeyData skey;
H
Hiroshi Inoue 已提交
2897
	HeapTuple	htup;
2898
	Datum		val;
2899 2900 2901
	bool		isnull;
	int			found;
	int			i;
2902

2903 2904 2905 2906
	ScanKeyInit(&skey,
				Anum_pg_attrdef_adrelid,
				BTEqualStrategyNumber, F_OIDEQ,
				ObjectIdGetDatum(RelationGetRelid(relation)));
2907

2908 2909
	adrel = heap_open(AttrDefaultRelationId, AccessShareLock);
	adscan = systable_beginscan(adrel, AttrDefaultIndexId, true,
2910
								SnapshotNow, 1, &skey);
2911
	found = 0;
2912

2913
	while (HeapTupleIsValid(htup = systable_getnext(adscan)))
2914
	{
2915
		Form_pg_attrdef adform = (Form_pg_attrdef) GETSTRUCT(htup);
2916

2917 2918 2919 2920
		for (i = 0; i < ndef; i++)
		{
			if (adform->adnum != attrdef[i].adnum)
				continue;
2921
			if (attrdef[i].adbin != NULL)
2922
				elog(WARNING, "multiple attrdef records found for attr %s of rel %s",
B
Bruce Momjian 已提交
2923
				NameStr(relation->rd_att->attrs[adform->adnum - 1]->attname),
2924
					 RelationGetRelationName(relation));
2925 2926
			else
				found++;
2927

2928 2929 2930
			val = fastgetattr(htup,
							  Anum_pg_attrdef_adbin,
							  adrel->rd_att, &isnull);
2931
			if (isnull)
2932
				elog(WARNING, "null adbin for attr %s of rel %s",
B
Bruce Momjian 已提交
2933
				NameStr(relation->rd_att->attrs[adform->adnum - 1]->attname),
2934
					 RelationGetRelationName(relation));
2935 2936
			else
				attrdef[i].adbin = MemoryContextStrdup(CacheMemoryContext,
B
Bruce Momjian 已提交
2937 2938
								 DatumGetCString(DirectFunctionCall1(textout,
																	 val)));
2939 2940
			break;
		}
2941

2942
		if (i >= ndef)
2943 2944
			elog(WARNING, "unexpected attrdef record found for attr %d of rel %s",
				 adform->adnum, RelationGetRelationName(relation));
2945 2946
	}

2947
	systable_endscan(adscan);
2948
	heap_close(adrel, AccessShareLock);
2949 2950

	if (found != ndef)
2951
		elog(WARNING, "%d attrdef record(s) missing for rel %s",
2952
			 ndef - found, RelationGetRelationName(relation));
2953 2954
}

2955
static void
2956
CheckConstraintFetch(Relation relation)
2957
{
2958 2959
	ConstrCheck *check = relation->rd_att->constr->check;
	int			ncheck = relation->rd_att->constr->num_check;
2960 2961 2962
	Relation	conrel;
	SysScanDesc conscan;
	ScanKeyData skey[1];
H
Hiroshi Inoue 已提交
2963
	HeapTuple	htup;
2964
	Datum		val;
2965
	bool		isnull;
2966
	int			found = 0;
2967

2968 2969 2970 2971
	ScanKeyInit(&skey[0],
				Anum_pg_constraint_conrelid,
				BTEqualStrategyNumber, F_OIDEQ,
				ObjectIdGetDatum(RelationGetRelid(relation)));
2972

2973 2974
	conrel = heap_open(ConstraintRelationId, AccessShareLock);
	conscan = systable_beginscan(conrel, ConstraintRelidIndexId, true,
2975
								 SnapshotNow, 1, skey);
2976

2977
	while (HeapTupleIsValid(htup = systable_getnext(conscan)))
2978
	{
2979 2980 2981 2982 2983 2984
		Form_pg_constraint conform = (Form_pg_constraint) GETSTRUCT(htup);

		/* We want check constraints only */
		if (conform->contype != CONSTRAINT_CHECK)
			continue;

2985 2986
		if (found >= ncheck)
			elog(ERROR, "unexpected constraint record found for rel %s",
2987
				 RelationGetRelationName(relation));
2988

2989
		check[found].ccname = MemoryContextStrdup(CacheMemoryContext,
B
Bruce Momjian 已提交
2990
												  NameStr(conform->conname));
2991 2992

		/* Grab and test conbin is actually set */
2993
		val = fastgetattr(htup,
2994 2995
						  Anum_pg_constraint_conbin,
						  conrel->rd_att, &isnull);
2996
		if (isnull)
2997
			elog(ERROR, "null conbin for rel %s",
2998
				 RelationGetRelationName(relation));
2999

3000
		check[found].ccbin = MemoryContextStrdup(CacheMemoryContext,
B
Bruce Momjian 已提交
3001 3002
								 DatumGetCString(DirectFunctionCall1(textout,
																	 val)));
3003 3004 3005
		found++;
	}

3006 3007
	systable_endscan(conscan);
	heap_close(conrel, AccessShareLock);
3008 3009

	if (found != ncheck)
3010
		elog(ERROR, "%d constraint record(s) missing for rel %s",
3011
			 ncheck - found, RelationGetRelationName(relation));
3012 3013
}

3014 3015 3016 3017 3018 3019
/*
 * RelationGetIndexList -- get a list of OIDs of indexes on this relation
 *
 * The index list is created only if someone requests it.  We scan pg_index
 * to find relevant indexes, and add the list to the relcache entry so that
 * we won't have to compute it again.  Note that shared cache inval of a
3020
 * relcache entry will delete the old list and set rd_indexvalid to 0,
3021 3022 3023
 * so that we must recompute the index list on next request.  This handles
 * creation or deletion of an index.
 *
3024 3025 3026 3027 3028 3029
 * The returned list is guaranteed to be sorted in order by OID.  This is
 * needed by the executor, since for index types that we obtain exclusive
 * locks on when updating the index, all backends must lock the indexes in
 * the same order or we will get deadlocks (see ExecOpenIndices()).  Any
 * consistent ordering would do, but ordering by OID is easy.
 *
3030 3031
 * Since shared cache inval causes the relcache's copy of the list to go away,
 * we return a copy of the list palloc'd in the caller's context.  The caller
B
Bruce Momjian 已提交
3032
 * may list_free() the returned list after scanning it. This is necessary
3033 3034
 * since the caller will typically be doing syscache lookups on the relevant
 * indexes, and syscache lookup could cause SI messages to be processed!
3035 3036 3037 3038 3039
 *
 * We also update rd_oidindex, which this module treats as effectively part
 * of the index list.  rd_oidindex is valid when rd_indexvalid isn't zero;
 * it is the pg_class OID of a unique index on OID when the relation has one,
 * and InvalidOid if there is no such index.
3040 3041 3042 3043 3044
 */
List *
RelationGetIndexList(Relation relation)
{
	Relation	indrel;
B
Bruce Momjian 已提交
3045
	SysScanDesc indscan;
3046
	ScanKeyData skey;
3047
	HeapTuple	htup;
3048
	List	   *result;
3049
	Oid			oidIndex;
3050 3051 3052
	MemoryContext oldcxt;

	/* Quick exit if we already computed the list. */
3053
	if (relation->rd_indexvalid != 0)
3054
		return list_copy(relation->rd_indexlist);
3055 3056

	/*
B
Bruce Momjian 已提交
3057 3058 3059 3060
	 * We build the list we intend to return (in the caller's context) while
	 * doing the scan.	After successfully completing the scan, we copy that
	 * list into the relcache entry.  This avoids cache-context memory leakage
	 * if we get some sort of error partway through.
3061 3062
	 */
	result = NIL;
3063
	oidIndex = InvalidOid;
B
Bruce Momjian 已提交
3064

3065
	/* Prepare to scan pg_index for entries having indrelid = this rel. */
3066 3067 3068 3069
	ScanKeyInit(&skey,
				Anum_pg_index_indrelid,
				BTEqualStrategyNumber, F_OIDEQ,
				ObjectIdGetDatum(RelationGetRelid(relation)));
3070

3071 3072
	indrel = heap_open(IndexRelationId, AccessShareLock);
	indscan = systable_beginscan(indrel, IndexIndrelidIndexId, true,
3073
								 SnapshotNow, 1, &skey);
3074

3075 3076 3077
	while (HeapTupleIsValid(htup = systable_getnext(indscan)))
	{
		Form_pg_index index = (Form_pg_index) GETSTRUCT(htup);
3078

3079
		/* Add index's OID to result list in the proper order */
3080
		result = insert_ordered_oid(result, index->indexrelid);
3081 3082 3083 3084 3085 3086 3087 3088

		/* Check to see if it is a unique, non-partial btree index on OID */
		if (index->indnatts == 1 &&
			index->indisunique &&
			index->indkey.values[0] == ObjectIdAttributeNumber &&
			index->indclass.values[0] == OID_BTREE_OPS_OID &&
			heap_attisnull(htup, Anum_pg_index_indpred))
			oidIndex = index->indexrelid;
3089 3090
	}

3091
	systable_endscan(indscan);
3092 3093
	heap_close(indrel, AccessShareLock);

3094
	/* Now save a copy of the completed list in the relcache entry. */
3095
	oldcxt = MemoryContextSwitchTo(CacheMemoryContext);
3096
	relation->rd_indexlist = list_copy(result);
3097
	relation->rd_oidindex = oidIndex;
3098
	relation->rd_indexvalid = 1;
3099 3100 3101 3102 3103
	MemoryContextSwitchTo(oldcxt);

	return result;
}

3104 3105 3106 3107 3108 3109 3110 3111 3112 3113 3114 3115
/*
 * insert_ordered_oid
 *		Insert a new Oid into a sorted list of Oids, preserving ordering
 *
 * Building the ordered list this way is O(N^2), but with a pretty small
 * constant, so for the number of entries we expect it will probably be
 * faster than trying to apply qsort().  Most tables don't have very many
 * indexes...
 */
static List *
insert_ordered_oid(List *list, Oid datum)
{
B
Bruce Momjian 已提交
3116
	ListCell   *prev;
3117 3118

	/* Does the datum belong at the front? */
3119 3120
	if (list == NIL || datum < linitial_oid(list))
		return lcons_oid(datum, list);
3121
	/* No, so find the entry it belongs after */
3122
	prev = list_head(list);
3123 3124
	for (;;)
	{
B
Bruce Momjian 已提交
3125
		ListCell   *curr = lnext(prev);
3126

3127
		if (curr == NULL || datum < lfirst_oid(curr))
B
Bruce Momjian 已提交
3128
			break;				/* it belongs after 'prev', before 'curr' */
3129 3130

		prev = curr;
3131
	}
3132 3133
	/* Insert datum into list after 'prev' */
	lappend_cell_oid(list, prev, datum);
3134 3135 3136
	return list;
}

3137 3138 3139 3140
/*
 * RelationSetIndexList -- externally force the index list contents
 *
 * This is used to temporarily override what we think the set of valid
3141 3142
 * indexes is (including the presence or absence of an OID index).
 * The forcing will be valid only until transaction commit or abort.
3143 3144 3145 3146 3147 3148
 *
 * This should only be applied to nailed relations, because in a non-nailed
 * relation the hacked index list could be lost at any time due to SI
 * messages.  In practice it is only used on pg_class (see REINDEX).
 *
 * It is up to the caller to make sure the given list is correctly ordered.
3149 3150 3151 3152 3153 3154 3155
 *
 * We deliberately do not change rd_indexattr here: even when operating
 * with a temporary partial index list, HOT-update decisions must be made
 * correctly with respect to the full index set.  It is up to the caller
 * to ensure that a correct rd_indexattr set has been cached before first
 * calling RelationSetIndexList; else a subsequent inquiry might cause a
 * wrong rd_indexattr set to get computed and cached.
3156 3157
 */
void
3158
RelationSetIndexList(Relation relation, List *indexIds, Oid oidIndex)
3159 3160 3161
{
	MemoryContext oldcxt;

3162
	Assert(relation->rd_isnailed);
3163 3164
	/* Copy the list into the cache context (could fail for lack of mem) */
	oldcxt = MemoryContextSwitchTo(CacheMemoryContext);
3165
	indexIds = list_copy(indexIds);
3166 3167
	MemoryContextSwitchTo(oldcxt);
	/* Okay to replace old list */
3168
	list_free(relation->rd_indexlist);
3169
	relation->rd_indexlist = indexIds;
3170
	relation->rd_oidindex = oidIndex;
B
Bruce Momjian 已提交
3171
	relation->rd_indexvalid = 2;	/* mark list as forced */
3172
	/* must flag that we have a forced index list */
3173
	need_eoxact_work = true;
3174 3175
}

3176 3177 3178 3179 3180 3181 3182 3183 3184 3185 3186
/*
 * RelationGetOidIndex -- get the pg_class OID of the relation's OID index
 *
 * Returns InvalidOid if there is no such index.
 */
Oid
RelationGetOidIndex(Relation relation)
{
	List	   *ilist;

	/*
B
Bruce Momjian 已提交
3187 3188 3189
	 * If relation doesn't have OIDs at all, caller is probably confused. (We
	 * could just silently return InvalidOid, but it seems better to throw an
	 * assertion.)
3190 3191 3192 3193 3194 3195 3196 3197 3198 3199 3200 3201 3202 3203
	 */
	Assert(relation->rd_rel->relhasoids);

	if (relation->rd_indexvalid == 0)
	{
		/* RelationGetIndexList does the heavy lifting. */
		ilist = RelationGetIndexList(relation);
		list_free(ilist);
		Assert(relation->rd_indexvalid != 0);
	}

	return relation->rd_oidindex;
}

3204 3205 3206 3207 3208 3209 3210 3211 3212 3213 3214 3215 3216 3217 3218 3219 3220 3221 3222 3223 3224 3225 3226 3227 3228 3229 3230 3231
/*
 * RelationGetIndexExpressions -- get the index expressions for an index
 *
 * We cache the result of transforming pg_index.indexprs into a node tree.
 * If the rel is not an index or has no expressional columns, we return NIL.
 * Otherwise, the returned tree is copied into the caller's memory context.
 * (We don't want to return a pointer to the relcache copy, since it could
 * disappear due to relcache invalidation.)
 */
List *
RelationGetIndexExpressions(Relation relation)
{
	List	   *result;
	Datum		exprsDatum;
	bool		isnull;
	char	   *exprsString;
	MemoryContext oldcxt;

	/* Quick exit if we already computed the result. */
	if (relation->rd_indexprs)
		return (List *) copyObject(relation->rd_indexprs);

	/* Quick exit if there is nothing to do. */
	if (relation->rd_indextuple == NULL ||
		heap_attisnull(relation->rd_indextuple, Anum_pg_index_indexprs))
		return NIL;

	/*
B
Bruce Momjian 已提交
3232 3233 3234
	 * We build the tree we intend to return in the caller's context. After
	 * successfully completing the work, we copy it into the relcache entry.
	 * This avoids problems if we get some sort of error partway through.
3235 3236 3237 3238 3239
	 */
	exprsDatum = heap_getattr(relation->rd_indextuple,
							  Anum_pg_index_indexprs,
							  GetPgIndexDescriptor(),
							  &isnull);
3240 3241 3242 3243 3244 3245
	Assert(!isnull);
	exprsString = DatumGetCString(DirectFunctionCall1(textout, exprsDatum));
	result = (List *) stringToNode(exprsString);
	pfree(exprsString);

	/*
3246 3247 3248 3249
	 * Run the expressions through eval_const_expressions. This is not just an
	 * optimization, but is necessary, because the planner will be comparing
	 * them to similarly-processed qual clauses, and may fail to detect valid
	 * matches without this.  We don't bother with canonicalize_qual, however.
3250
	 */
3251
	result = (List *) eval_const_expressions(NULL, (Node *) result);
3252

3253 3254 3255 3256 3257 3258
	/*
	 * Also mark any coercion format fields as "don't care", so that the
	 * planner can match to both explicit and implicit coercions.
	 */
	set_coercionform_dontcare((Node *) result);

3259 3260 3261 3262
	/* May as well fix opfuncids too */
	fix_opfuncids((Node *) result);

	/* Now save a copy of the completed tree in the relcache entry. */
3263
	oldcxt = MemoryContextSwitchTo(relation->rd_indexcxt);
3264 3265 3266 3267 3268 3269 3270 3271 3272
	relation->rd_indexprs = (List *) copyObject(result);
	MemoryContextSwitchTo(oldcxt);

	return result;
}

/*
 * RelationGetIndexPredicate -- get the index predicate for an index
 *
3273 3274
 * We cache the result of transforming pg_index.indpred into an implicit-AND
 * node tree (suitable for ExecQual).
3275 3276 3277 3278 3279 3280 3281 3282 3283 3284 3285 3286 3287 3288 3289 3290 3291 3292 3293 3294 3295 3296 3297 3298
 * If the rel is not an index or has no predicate, we return NIL.
 * Otherwise, the returned tree is copied into the caller's memory context.
 * (We don't want to return a pointer to the relcache copy, since it could
 * disappear due to relcache invalidation.)
 */
List *
RelationGetIndexPredicate(Relation relation)
{
	List	   *result;
	Datum		predDatum;
	bool		isnull;
	char	   *predString;
	MemoryContext oldcxt;

	/* Quick exit if we already computed the result. */
	if (relation->rd_indpred)
		return (List *) copyObject(relation->rd_indpred);

	/* Quick exit if there is nothing to do. */
	if (relation->rd_indextuple == NULL ||
		heap_attisnull(relation->rd_indextuple, Anum_pg_index_indpred))
		return NIL;

	/*
B
Bruce Momjian 已提交
3299 3300 3301
	 * We build the tree we intend to return in the caller's context. After
	 * successfully completing the work, we copy it into the relcache entry.
	 * This avoids problems if we get some sort of error partway through.
3302 3303 3304 3305 3306
	 */
	predDatum = heap_getattr(relation->rd_indextuple,
							 Anum_pg_index_indpred,
							 GetPgIndexDescriptor(),
							 &isnull);
3307 3308 3309 3310 3311 3312
	Assert(!isnull);
	predString = DatumGetCString(DirectFunctionCall1(textout, predDatum));
	result = (List *) stringToNode(predString);
	pfree(predString);

	/*
3313 3314 3315 3316 3317
	 * Run the expression through const-simplification and canonicalization.
	 * This is not just an optimization, but is necessary, because the planner
	 * will be comparing it to similarly-processed qual clauses, and may fail
	 * to detect valid matches without this.  This must match the processing
	 * done to qual clauses in preprocess_expression()!  (We can skip the
B
Bruce Momjian 已提交
3318 3319
	 * stuff involving subqueries, however, since we don't allow any in index
	 * predicates.)
3320
	 */
3321
	result = (List *) eval_const_expressions(NULL, (Node *) result);
3322

3323 3324
	result = (List *) canonicalize_qual((Expr *) result);

3325 3326 3327 3328 3329 3330
	/*
	 * Also mark any coercion format fields as "don't care", so that the
	 * planner can match to both explicit and implicit coercions.
	 */
	set_coercionform_dontcare((Node *) result);

3331 3332 3333
	/* Also convert to implicit-AND format */
	result = make_ands_implicit((Expr *) result);

3334 3335 3336 3337
	/* May as well fix opfuncids too */
	fix_opfuncids((Node *) result);

	/* Now save a copy of the completed tree in the relcache entry. */
3338
	oldcxt = MemoryContextSwitchTo(relation->rd_indexcxt);
3339 3340 3341 3342 3343 3344
	relation->rd_indpred = (List *) copyObject(result);
	MemoryContextSwitchTo(oldcxt);

	return result;
}

3345 3346 3347 3348 3349 3350 3351 3352 3353 3354 3355 3356 3357 3358 3359 3360 3361
/*
 * RelationGetIndexAttrBitmap -- get a bitmap of index attribute numbers
 *
 * The result has a bit set for each attribute used anywhere in the index
 * definitions of all the indexes on this relation.  (This includes not only
 * simple index keys, but attributes used in expressions and partial-index
 * predicates.)
 *
 * Attribute numbers are offset by FirstLowInvalidHeapAttributeNumber so that
 * we can include system attributes (e.g., OID) in the bitmap representation.
 *
 * The returned result is palloc'd in the caller's memory context and should
 * be bms_free'd when not needed anymore.
 */
Bitmapset *
RelationGetIndexAttrBitmap(Relation relation)
{
B
Bruce Momjian 已提交
3362 3363 3364
	Bitmapset  *indexattrs;
	List	   *indexoidlist;
	ListCell   *l;
3365 3366 3367 3368 3369 3370 3371 3372 3373 3374 3375 3376 3377 3378 3379 3380 3381 3382 3383 3384 3385 3386 3387 3388 3389 3390 3391 3392
	MemoryContext oldcxt;

	/* Quick exit if we already computed the result. */
	if (relation->rd_indexattr != NULL)
		return bms_copy(relation->rd_indexattr);

	/* Fast path if definitely no indexes */
	if (!RelationGetForm(relation)->relhasindex)
		return NULL;

	/*
	 * Get cached list of index OIDs
	 */
	indexoidlist = RelationGetIndexList(relation);

	/* Fall out if no indexes (but relhasindex was set) */
	if (indexoidlist == NIL)
		return NULL;

	/*
	 * For each index, add referenced attributes to indexattrs.
	 */
	indexattrs = NULL;
	foreach(l, indexoidlist)
	{
		Oid			indexOid = lfirst_oid(l);
		Relation	indexDesc;
		IndexInfo  *indexInfo;
B
Bruce Momjian 已提交
3393
		int			i;
3394 3395 3396 3397 3398 3399 3400 3401 3402

		indexDesc = index_open(indexOid, AccessShareLock);

		/* Extract index key information from the index's pg_index row */
		indexInfo = BuildIndexInfo(indexDesc);

		/* Collect simple attribute references */
		for (i = 0; i < indexInfo->ii_NumIndexAttrs; i++)
		{
B
Bruce Momjian 已提交
3403
			int			attrnum = indexInfo->ii_KeyAttrNumbers[i];
3404 3405 3406

			if (attrnum != 0)
				indexattrs = bms_add_member(indexattrs,
B
Bruce Momjian 已提交
3407
							   attrnum - FirstLowInvalidHeapAttributeNumber);
3408 3409 3410 3411 3412 3413 3414 3415 3416 3417 3418 3419 3420 3421 3422 3423 3424 3425 3426 3427 3428 3429
		}

		/* Collect all attributes used in expressions, too */
		pull_varattnos((Node *) indexInfo->ii_Expressions, &indexattrs);

		/* Collect all attributes in the index predicate, too */
		pull_varattnos((Node *) indexInfo->ii_Predicate, &indexattrs);

		index_close(indexDesc, AccessShareLock);
	}

	list_free(indexoidlist);

	/* Now save a copy of the bitmap in the relcache entry. */
	oldcxt = MemoryContextSwitchTo(CacheMemoryContext);
	relation->rd_indexattr = bms_copy(indexattrs);
	MemoryContextSwitchTo(oldcxt);

	/* We return our original working copy for caller to play with */
	return indexattrs;
}

3430

3431
/*
3432
 *	load_relcache_init_file, write_relcache_init_file
3433
 *
3434 3435 3436
 *		In late 1992, we started regularly having databases with more than
 *		a thousand classes in them.  With this number of classes, it became
 *		critical to do indexed lookups on the system catalogs.
3437
 *
3438 3439 3440 3441
 *		Bootstrapping these lookups is very hard.  We want to be able to
 *		use an index on pg_attribute, for example, but in order to do so,
 *		we must have read pg_attribute for the attributes in the index,
 *		which implies that we need to use the index.
3442
 *
3443
 *		In order to get around the problem, we do the following:
3444
 *
3445
 *		   +  When the database system is initialized (at initdb time), we
3446
 *			  don't use indexes.  We do sequential scans.
3447
 *
3448 3449 3450
 *		   +  When the backend is started up in normal mode, we load an image
 *			  of the appropriate relation descriptors, in internal format,
 *			  from an initialization file in the data/base/... directory.
3451
 *
3452
 *		   +  If the initialization file isn't there, then we create the
3453
 *			  relation descriptors using sequential scans and write 'em to
3454
 *			  the initialization file for use by subsequent backends.
3455
 *
3456 3457
 *		We could dispense with the initialization file and just build the
 *		critical reldescs the hard way on every backend startup, but that
3458 3459 3460 3461 3462 3463 3464
 *		slows down backend startup noticeably.
 *
 *		We can in fact go further, and save more relcache entries than
 *		just the ones that are absolutely critical; this allows us to speed
 *		up backend startup by not having to build such entries the hard way.
 *		Presently, all the catalog and index entries that are referred to
 *		by catcaches are stored in the initialization file.
3465
 *
T
Tom Lane 已提交
3466 3467 3468 3469
 *		The same mechanism that detects when catcache and relcache entries
 *		need to be invalidated (due to catalog updates) also arranges to
 *		unlink the initialization file when its contents may be out of date.
 *		The file will then be rebuilt during the next backend startup.
3470 3471
 */

3472 3473 3474 3475
/*
 * load_relcache_init_file -- attempt to load cache from the init file
 *
 * If successful, return TRUE and set criticalRelcachesBuilt to true.
3476
 * If not successful, return FALSE.
3477 3478 3479 3480 3481
 *
 * NOTE: we assume we are already switched into CacheMemoryContext.
 */
static bool
load_relcache_init_file(void)
3482
{
3483 3484 3485 3486 3487 3488 3489
	FILE	   *fp;
	char		initfilename[MAXPGPATH];
	Relation   *rels;
	int			relno,
				num_rels,
				max_rels,
				nailed_rels,
3490 3491
				nailed_indexes,
				magic;
3492
	int			i;
3493

3494 3495 3496 3497 3498 3499
	snprintf(initfilename, sizeof(initfilename), "%s/%s",
			 DatabasePath, RELCACHE_INIT_FILENAME);

	fp = AllocateFile(initfilename, PG_BINARY_R);
	if (fp == NULL)
		return false;
3500

3501
	/*
B
Bruce Momjian 已提交
3502 3503 3504
	 * Read the index relcache entries from the file.  Note we will not enter
	 * any of them into the cache if the read fails partway through; this
	 * helps to guard against broken init files.
3505 3506 3507 3508 3509 3510 3511
	 */
	max_rels = 100;
	rels = (Relation *) palloc(max_rels * sizeof(Relation));
	num_rels = 0;
	nailed_rels = nailed_indexes = 0;
	initFileRelationIds = NIL;

3512 3513 3514 3515 3516 3517
	/* check for correct magic number (compatible version) */
	if (fread(&magic, 1, sizeof(magic), fp) != sizeof(magic))
		goto read_failed;
	if (magic != RELCACHE_INIT_FILEMAGIC)
		goto read_failed;

B
Bruce Momjian 已提交
3518
	for (relno = 0;; relno++)
3519
	{
3520 3521 3522 3523
		Size		len;
		size_t		nread;
		Relation	rel;
		Form_pg_class relform;
3524
		bool		has_not_null;
3525

3526
		/* first read the relation descriptor length */
3527 3528 3529 3530
		if ((nread = fread(&len, 1, sizeof(len), fp)) != sizeof(len))
		{
			if (nread == 0)
				break;			/* end of file */
3531
			goto read_failed;
3532
		}
3533

3534 3535
		/* safety check for incompatible relcache layout */
		if (len != sizeof(RelationData))
3536
			goto read_failed;
3537

3538 3539 3540 3541 3542 3543
		/* allocate another relcache header */
		if (num_rels >= max_rels)
		{
			max_rels *= 2;
			rels = (Relation *) repalloc(rels, max_rels * sizeof(Relation));
		}
3544

3545
		rel = rels[num_rels++] = (Relation) palloc(len);
3546

3547 3548
		/* then, read the Relation structure */
		if ((nread = fread(rel, 1, len, fp)) != len)
3549
			goto read_failed;
3550 3551

		/* next read the relation tuple form */
3552
		if ((nread = fread(&len, 1, sizeof(len), fp)) != sizeof(len))
3553
			goto read_failed;
3554 3555

		relform = (Form_pg_class) palloc(len);
3556
		if ((nread = fread(relform, 1, len, fp)) != len)
3557
			goto read_failed;
3558

3559
		rel->rd_rel = relform;
3560 3561

		/* initialize attribute tuple forms */
3562 3563
		rel->rd_att = CreateTemplateTupleDesc(relform->relnatts,
											  relform->relhasoids);
3564 3565
		rel->rd_att->tdrefcount = 1;	/* mark as refcounted */

3566
		rel->rd_att->tdtypeid = relform->reltype;
B
Bruce Momjian 已提交
3567
		rel->rd_att->tdtypmod = -1;		/* unnecessary, but... */
3568 3569

		/* next read all the attribute tuple form data entries */
3570
		has_not_null = false;
3571 3572
		for (i = 0; i < relform->relnatts; i++)
		{
3573
			if ((nread = fread(&len, 1, sizeof(len), fp)) != sizeof(len))
3574
				goto read_failed;
3575 3576
			if (len != ATTRIBUTE_TUPLE_SIZE)
				goto read_failed;
3577
			if ((nread = fread(rel->rd_att->attrs[i], 1, len, fp)) != len)
3578
				goto read_failed;
3579 3580 3581 3582

			has_not_null |= rel->rd_att->attrs[i]->attnotnull;
		}

B
Bruce Momjian 已提交
3583 3584 3585 3586 3587 3588 3589 3590
		/* next read the access method specific field */
		if ((nread = fread(&len, 1, sizeof(len), fp)) != sizeof(len))
			goto read_failed;
		if (len > 0)
		{
			rel->rd_options = palloc(len);
			if ((nread = fread(rel->rd_options, 1, len, fp)) != len)
				goto read_failed;
3591
			if (len != VARSIZE(rel->rd_options))
B
Bruce Momjian 已提交
3592
				goto read_failed;		/* sanity check */
B
Bruce Momjian 已提交
3593 3594 3595 3596 3597 3598
		}
		else
		{
			rel->rd_options = NULL;
		}

3599 3600 3601 3602 3603 3604 3605
		/* mark not-null status */
		if (has_not_null)
		{
			TupleConstr *constr = (TupleConstr *) palloc0(sizeof(TupleConstr));

			constr->has_not_null = true;
			rel->rd_att->constr = constr;
3606 3607
		}

3608 3609 3610 3611 3612
		/* If it's an index, there's more to do */
		if (rel->rd_rel->relkind == RELKIND_INDEX)
		{
			Form_pg_am	am;
			MemoryContext indexcxt;
3613 3614
			Oid		   *opfamily;
			Oid		   *opcintype;
3615 3616
			Oid		   *operator;
			RegProcedure *support;
3617
			int			nsupport;
3618
			int16	   *indoption;
3619 3620 3621 3622 3623

			/* Count nailed indexes to ensure we have 'em all */
			if (rel->rd_isnailed)
				nailed_indexes++;

3624
			/* next, read the pg_index tuple */
3625 3626
			if ((nread = fread(&len, 1, sizeof(len), fp)) != sizeof(len))
				goto read_failed;
3627

3628 3629
			rel->rd_indextuple = (HeapTuple) palloc(len);
			if ((nread = fread(rel->rd_indextuple, 1, len, fp)) != len)
3630
				goto read_failed;
3631

3632 3633 3634 3635
			/* Fix up internal pointers in the tuple -- see heap_copytuple */
			rel->rd_indextuple->t_data = (HeapTupleHeader) ((char *) rel->rd_indextuple + HEAPTUPLESIZE);
			rel->rd_index = (Form_pg_index) GETSTRUCT(rel->rd_indextuple);

3636 3637 3638
			/* next, read the access method tuple form */
			if ((nread = fread(&len, 1, sizeof(len), fp)) != sizeof(len))
				goto read_failed;
3639

3640 3641 3642 3643
			am = (Form_pg_am) palloc(len);
			if ((nread = fread(am, 1, len, fp)) != len)
				goto read_failed;
			rel->rd_am = am;
3644

3645 3646 3647 3648 3649 3650
			/*
			 * prepare index info context --- parameters should match
			 * RelationInitIndexAccessInfo
			 */
			indexcxt = AllocSetContextCreate(CacheMemoryContext,
											 RelationGetRelationName(rel),
3651 3652 3653
											 ALLOCSET_SMALL_MINSIZE,
											 ALLOCSET_SMALL_INITSIZE,
											 ALLOCSET_SMALL_MAXSIZE);
3654 3655
			rel->rd_indexcxt = indexcxt;

3656 3657 3658 3659 3660 3661 3662 3663 3664 3665 3666 3667 3668 3669 3670 3671 3672 3673 3674 3675
			/* next, read the vector of opfamily OIDs */
			if ((nread = fread(&len, 1, sizeof(len), fp)) != sizeof(len))
				goto read_failed;

			opfamily = (Oid *) MemoryContextAlloc(indexcxt, len);
			if ((nread = fread(opfamily, 1, len, fp)) != len)
				goto read_failed;

			rel->rd_opfamily = opfamily;

			/* next, read the vector of opcintype OIDs */
			if ((nread = fread(&len, 1, sizeof(len), fp)) != sizeof(len))
				goto read_failed;

			opcintype = (Oid *) MemoryContextAlloc(indexcxt, len);
			if ((nread = fread(opcintype, 1, len, fp)) != len)
				goto read_failed;

			rel->rd_opcintype = opcintype;

3676 3677 3678 3679 3680 3681 3682 3683 3684
			/* next, read the vector of operator OIDs */
			if ((nread = fread(&len, 1, sizeof(len), fp)) != sizeof(len))
				goto read_failed;

			operator = (Oid *) MemoryContextAlloc(indexcxt, len);
			if ((nread = fread(operator, 1, len, fp)) != len)
				goto read_failed;

			rel->rd_operator = operator;
3685

3686
			/* next, read the vector of support procedures */
3687 3688 3689 3690 3691 3692 3693 3694
			if ((nread = fread(&len, 1, sizeof(len), fp)) != sizeof(len))
				goto read_failed;
			support = (RegProcedure *) MemoryContextAlloc(indexcxt, len);
			if ((nread = fread(support, 1, len, fp)) != len)
				goto read_failed;

			rel->rd_support = support;

3695 3696 3697 3698 3699 3700 3701 3702 3703 3704
			/* finally, read the vector of indoption values */
			if ((nread = fread(&len, 1, sizeof(len), fp)) != sizeof(len))
				goto read_failed;

			indoption = (int16 *) MemoryContextAlloc(indexcxt, len);
			if ((nread = fread(indoption, 1, len, fp)) != len)
				goto read_failed;

			rel->rd_indoption = indoption;

3705 3706 3707
			/* set up zeroed fmgr-info vectors */
			rel->rd_aminfo = (RelationAmInfo *)
				MemoryContextAllocZero(indexcxt, sizeof(RelationAmInfo));
3708 3709
			nsupport = relform->relnatts * am->amsupport;
			rel->rd_supportinfo = (FmgrInfo *)
3710
				MemoryContextAllocZero(indexcxt, nsupport * sizeof(FmgrInfo));
3711 3712 3713 3714 3715 3716 3717 3718
		}
		else
		{
			/* Count nailed rels to ensure we have 'em all */
			if (rel->rd_isnailed)
				nailed_rels++;

			Assert(rel->rd_index == NULL);
3719
			Assert(rel->rd_indextuple == NULL);
3720 3721
			Assert(rel->rd_am == NULL);
			Assert(rel->rd_indexcxt == NULL);
3722
			Assert(rel->rd_aminfo == NULL);
3723 3724
			Assert(rel->rd_opfamily == NULL);
			Assert(rel->rd_opcintype == NULL);
3725 3726 3727
			Assert(rel->rd_operator == NULL);
			Assert(rel->rd_support == NULL);
			Assert(rel->rd_supportinfo == NULL);
3728
			Assert(rel->rd_indoption == NULL);
3729 3730 3731 3732
		}

		/*
		 * Rules and triggers are not saved (mainly because the internal
B
Bruce Momjian 已提交
3733 3734
		 * format is complex and subject to change).  They must be rebuilt if
		 * needed by RelationCacheInitializePhase2.  This is not expected to
3735 3736
		 * be a big performance hit since few system catalogs have such. Ditto
		 * for index expressions and predicates.
3737 3738 3739 3740
		 */
		rel->rd_rules = NULL;
		rel->rd_rulescxt = NULL;
		rel->trigdesc = NULL;
3741 3742
		rel->rd_indexprs = NIL;
		rel->rd_indpred = NIL;
3743 3744 3745 3746

		/*
		 * Reset transient-state fields in the relcache entry
		 */
3747
		rel->rd_smgr = NULL;
3748 3749
		rel->rd_targblock = InvalidBlockNumber;
		if (rel->rd_isnailed)
3750
			rel->rd_refcnt = 1;
3751
		else
3752
			rel->rd_refcnt = 0;
3753
		rel->rd_indexvalid = 0;
3754
		rel->rd_indexlist = NIL;
3755
		rel->rd_indexattr = NULL;
3756
		rel->rd_oidindex = InvalidOid;
3757
		rel->rd_createSubid = InvalidSubTransactionId;
3758
		rel->rd_newRelfilenodeSubid = InvalidSubTransactionId;
3759
		rel->rd_amcache = NULL;
3760
		MemSet(&rel->pgstat_info, 0, sizeof(rel->pgstat_info));
3761

3762
		/*
3763
		 * Recompute lock and physical addressing info.  This is needed in
B
Bruce Momjian 已提交
3764 3765
		 * case the pg_internal.init file was copied from some other database
		 * by CREATE DATABASE.
3766 3767
		 */
		RelationInitLockInfo(rel);
3768
		RelationInitPhysicalAddr(rel);
3769 3770 3771
	}

	/*
B
Bruce Momjian 已提交
3772 3773 3774
	 * We reached the end of the init file without apparent problem. Did we
	 * get the right number of nailed items?  (This is a useful crosscheck in
	 * case the set of critical rels or indexes changes.)
3775 3776 3777 3778 3779 3780 3781 3782 3783 3784 3785 3786 3787 3788
	 */
	if (nailed_rels != NUM_CRITICAL_RELS ||
		nailed_indexes != NUM_CRITICAL_INDEXES)
		goto read_failed;

	/*
	 * OK, all appears well.
	 *
	 * Now insert all the new relcache entries into the cache.
	 */
	for (relno = 0; relno < num_rels; relno++)
	{
		RelationCacheInsert(rels[relno]);
		/* also make a list of their OIDs, for RelationIdIsInInitFile */
3789
		initFileRelationIds = lcons_oid(RelationGetRelid(rels[relno]),
B
Bruce Momjian 已提交
3790
										initFileRelationIds);
3791
	}
3792

3793 3794 3795
	pfree(rels);
	FreeFile(fp);

3796
	criticalRelcachesBuilt = true;
3797
	return true;
3798

3799
	/*
B
Bruce Momjian 已提交
3800 3801 3802
	 * init file is broken, so do it the hard way.	We don't bother trying to
	 * free the clutter we just allocated; it's not in the relcache so it
	 * won't hurt.
3803
	 */
3804
read_failed:
3805 3806 3807 3808
	pfree(rels);
	FreeFile(fp);

	return false;
3809 3810
}

3811 3812 3813 3814
/*
 * Write out a new initialization file with the current contents
 * of the relcache.
 */
3815
static void
3816
write_relcache_init_file(void)
3817
{
3818
	FILE	   *fp;
3819 3820
	char		tempfilename[MAXPGPATH];
	char		finalfilename[MAXPGPATH];
3821
	int			magic;
3822
	HASH_SEQ_STATUS status;
3823
	RelIdCacheEnt *idhentry;
3824 3825
	MemoryContext oldcxt;
	int			i;
3826 3827

	/*
3828
	 * We must write a temporary file and rename it into place. Otherwise,
B
Bruce Momjian 已提交
3829 3830
	 * another backend starting at about the same time might crash trying to
	 * read the partially-complete file.
3831
	 */
3832 3833 3834 3835
	snprintf(tempfilename, sizeof(tempfilename), "%s/%s.%d",
			 DatabasePath, RELCACHE_INIT_FILENAME, MyProcPid);
	snprintf(finalfilename, sizeof(finalfilename), "%s/%s",
			 DatabasePath, RELCACHE_INIT_FILENAME);
3836

3837 3838 3839 3840
	unlink(tempfilename);		/* in case it exists w/wrong permissions */

	fp = AllocateFile(tempfilename, PG_BINARY_W);
	if (fp == NULL)
3841 3842 3843 3844 3845
	{
		/*
		 * We used to consider this a fatal error, but we might as well
		 * continue with backend startup ...
		 */
3846 3847
		ereport(WARNING,
				(errcode_for_file_access(),
3848
				 errmsg("could not create relation-cache initialization file \"%s\": %m",
3849
						tempfilename),
B
Bruce Momjian 已提交
3850
			  errdetail("Continuing anyway, but there's something wrong.")));
3851 3852
		return;
	}
3853

3854
	/*
B
Bruce Momjian 已提交
3855
	 * Write a magic number to serve as a file version identifier.	We can
3856 3857 3858 3859 3860 3861
	 * change the magic number whenever the relcache layout changes.
	 */
	magic = RELCACHE_INIT_FILEMAGIC;
	if (fwrite(&magic, 1, sizeof(magic), fp) != sizeof(magic))
		elog(FATAL, "could not write init file");

3862
	/*
3863
	 * Write all the reldescs (in no particular order).
H
Hiroshi Inoue 已提交
3864
	 */
3865
	hash_seq_init(&status, RelationIdCache);
3866

3867
	initFileRelationIds = NIL;
3868

3869
	while ((idhentry = (RelIdCacheEnt *) hash_seq_search(&status)) != NULL)
3870
	{
3871
		Relation	rel = idhentry->reldesc;
3872
		Form_pg_class relform = rel->rd_rel;
3873

B
Bruce Momjian 已提交
3874 3875
		/* first write the relcache entry proper */
		write_item(rel, sizeof(RelationData), fp);
3876 3877

		/* next write the relation tuple form */
B
Bruce Momjian 已提交
3878
		write_item(relform, CLASS_TUPLE_SIZE, fp);
3879 3880 3881 3882

		/* next, do all the attribute tuple form data entries */
		for (i = 0; i < relform->relnatts; i++)
		{
3883
			write_item(rel->rd_att->attrs[i], ATTRIBUTE_TUPLE_SIZE, fp);
3884 3885
		}

B
Bruce Momjian 已提交
3886 3887
		/* next, do the access method specific field */
		write_item(rel->rd_options,
3888
				   (rel->rd_options ? VARSIZE(rel->rd_options) : 0),
3889
				   fp);
B
Bruce Momjian 已提交
3890

3891 3892 3893 3894
		/* If it's an index, there's more to do */
		if (rel->rd_rel->relkind == RELKIND_INDEX)
		{
			Form_pg_am	am = rel->rd_am;
3895

3896 3897
			/* write the pg_index tuple */
			/* we assume this was created by heap_copytuple! */
B
Bruce Momjian 已提交
3898
			write_item(rel->rd_indextuple,
3899 3900
					   HEAPTUPLESIZE + rel->rd_indextuple->t_len,
					   fp);
3901 3902

			/* next, write the access method tuple form */
B
Bruce Momjian 已提交
3903
			write_item(am, sizeof(FormData_pg_am), fp);
3904

3905 3906 3907 3908 3909 3910 3911 3912 3913 3914
			/* next, write the vector of opfamily OIDs */
			write_item(rel->rd_opfamily,
					   relform->relnatts * sizeof(Oid),
					   fp);

			/* next, write the vector of opcintype OIDs */
			write_item(rel->rd_opcintype,
					   relform->relnatts * sizeof(Oid),
					   fp);

3915
			/* next, write the vector of operator OIDs */
3916 3917 3918
			write_item(rel->rd_operator,
					   relform->relnatts * (am->amstrategies * sizeof(Oid)),
					   fp);
3919

3920
			/* next, write the vector of support procedures */
3921
			write_item(rel->rd_support,
B
Bruce Momjian 已提交
3922
				  relform->relnatts * (am->amsupport * sizeof(RegProcedure)),
3923
					   fp);
3924 3925 3926 3927 3928

			/* finally, write the vector of indoption values */
			write_item(rel->rd_indoption,
					   relform->relnatts * sizeof(int16),
					   fp);
3929
		}
3930

3931 3932
		/* also make a list of their OIDs, for RelationIdIsInInitFile */
		oldcxt = MemoryContextSwitchTo(CacheMemoryContext);
3933
		initFileRelationIds = lcons_oid(RelationGetRelid(rel),
B
Bruce Momjian 已提交
3934
										initFileRelationIds);
3935
		MemoryContextSwitchTo(oldcxt);
3936
	}
3937

3938 3939
	if (FreeFile(fp))
		elog(FATAL, "could not write init file");
3940

3941
	/*
3942
	 * Now we have to check whether the data we've so painstakingly
B
Bruce Momjian 已提交
3943 3944 3945 3946 3947
	 * accumulated is already obsolete due to someone else's just-committed
	 * catalog changes.  If so, we just delete the temp file and leave it to
	 * the next backend to try again.  (Our own relcache entries will be
	 * updated by SI message processing, but we can't be sure whether what we
	 * wrote out was up-to-date.)
3948
	 *
B
Bruce Momjian 已提交
3949 3950
	 * This mustn't run concurrently with RelationCacheInitFileInvalidate, so
	 * grab a serialization lock for the duration.
3951
	 */
3952 3953 3954 3955 3956 3957
	LWLockAcquire(RelCacheInitLock, LW_EXCLUSIVE);

	/* Make sure we have seen all incoming SI messages */
	AcceptInvalidationMessages();

	/*
B
Bruce Momjian 已提交
3958 3959
	 * If we have received any SI relcache invals since backend start, assume
	 * we may have written out-of-date data.
3960 3961
	 */
	if (relcacheInvalsReceived == 0L)
3962 3963
	{
		/*
3964 3965
		 * OK, rename the temp file to its final name, deleting any
		 * previously-existing init file.
3966
		 *
3967 3968 3969 3970
		 * Note: a failure here is possible under Cygwin, if some other
		 * backend is holding open an unlinked-but-not-yet-gone init file. So
		 * treat this as a noncritical failure; just remove the useless temp
		 * file on failure.
3971
		 */
3972 3973
		if (rename(tempfilename, finalfilename) < 0)
			unlink(tempfilename);
3974 3975 3976 3977
	}
	else
	{
		/* Delete the already-obsolete temp file */
3978 3979
		unlink(tempfilename);
	}
3980 3981

	LWLockRelease(RelCacheInitLock);
3982 3983
}

3984 3985 3986 3987 3988 3989 3990 3991 3992 3993
/* write a chunk of data preceded by its length */
static void
write_item(const void *data, Size len, FILE *fp)
{
	if (fwrite(&len, 1, sizeof(len), fp) != sizeof(len))
		elog(FATAL, "could not write init file");
	if (fwrite(data, 1, len, fp) != len)
		elog(FATAL, "could not write init file");
}

3994 3995 3996 3997 3998 3999 4000 4001 4002 4003 4004 4005
/*
 * Detect whether a given relation (identified by OID) is one of the ones
 * we store in the init file.
 *
 * Note that we effectively assume that all backends running in a database
 * would choose to store the same set of relations in the init file;
 * otherwise there are cases where we'd fail to detect the need for an init
 * file invalidation.  This does not seem likely to be a problem in practice.
 */
bool
RelationIdIsInInitFile(Oid relationId)
{
4006
	return list_member_oid(initFileRelationIds, relationId);
4007 4008 4009 4010 4011 4012 4013 4014 4015 4016 4017 4018
}

/*
 * Invalidate (remove) the init file during commit of a transaction that
 * changed one or more of the relation cache entries that are kept in the
 * init file.
 *
 * We actually need to remove the init file twice: once just before sending
 * the SI messages that include relcache inval for such relations, and once
 * just after sending them.  The unlink before ensures that a backend that's
 * currently starting cannot read the now-obsolete init file and then miss
 * the SI messages that will force it to update its relcache entries.  (This
4019
 * works because the backend startup sequence gets into the PGPROC array before
4020 4021 4022 4023 4024 4025 4026 4027 4028 4029 4030 4031 4032 4033 4034 4035 4036 4037 4038 4039 4040 4041 4042 4043
 * trying to load the init file.)  The unlink after is to synchronize with a
 * backend that may currently be trying to write an init file based on data
 * that we've just rendered invalid.  Such a backend will see the SI messages,
 * but we can't leave the init file sitting around to fool later backends.
 *
 * Ignore any failure to unlink the file, since it might not be there if
 * no backend has been started since the last removal.
 */
void
RelationCacheInitFileInvalidate(bool beforeSend)
{
	char		initfilename[MAXPGPATH];

	snprintf(initfilename, sizeof(initfilename), "%s/%s",
			 DatabasePath, RELCACHE_INIT_FILENAME);

	if (beforeSend)
	{
		/* no interlock needed here */
		unlink(initfilename);
	}
	else
	{
		/*
B
Bruce Momjian 已提交
4044 4045
		 * We need to interlock this against write_relcache_init_file, to
		 * guard against possibility that someone renames a new-but-
B
Bruce Momjian 已提交
4046 4047 4048 4049
		 * already-obsolete init file into place just after we unlink. With
		 * the interlock, it's certain that write_relcache_init_file will
		 * notice our SI inval message before renaming into place, or else
		 * that we will execute second and successfully unlink the file.
4050 4051 4052 4053 4054
		 */
		LWLockAcquire(RelCacheInitLock, LW_EXCLUSIVE);
		unlink(initfilename);
		LWLockRelease(RelCacheInitLock);
	}
4055
}
4056 4057 4058 4059 4060 4061 4062 4063 4064 4065 4066 4067 4068 4069 4070 4071 4072 4073 4074 4075 4076 4077

/*
 * Remove the init file for a given database during postmaster startup.
 *
 * We used to keep the init file across restarts, but that is unsafe in PITR
 * scenarios, and even in simple crash-recovery cases there are windows for
 * the init file to become out-of-sync with the database.  So now we just
 * remove it during startup and expect the first backend launch to rebuild it.
 * Of course, this has to happen in each database of the cluster.  For
 * simplicity this is driven by flatfiles.c, which has to scan pg_database
 * anyway.
 */
void
RelationCacheInitFileRemove(const char *dbPath)
{
	char		initfilename[MAXPGPATH];

	snprintf(initfilename, sizeof(initfilename), "%s/%s",
			 dbPath, RELCACHE_INIT_FILENAME);
	unlink(initfilename);
	/* ignore any error, since it might not be there at all */
}