shmem.c 17.8 KB
Newer Older
1 2 3
/*-------------------------------------------------------------------------
 *
 * shmem.c--
4
 *	  create shared memory and initialize shared memory data structures.
5 6 7 8 9
 *
 * Copyright (c) 1994, Regents of the University of California
 *
 *
 * IDENTIFICATION
10
 *	  $Header: /cvsroot/pgsql/src/backend/storage/ipc/shmem.c,v 1.35 1999/02/03 21:17:16 momjian Exp $
11 12 13 14 15
 *
 *-------------------------------------------------------------------------
 */
/*
 * POSTGRES processes share one or more regions of shared memory.
16 17
 * The shared memory is created by a postmaster and is inherited
 * by each backends via fork().  The routines in this file are used for
18 19 20
 * allocating and binding to shared memory data structures.
 *
 * NOTES:
21 22 23 24 25 26 27 28 29 30
 *		(a) There are three kinds of shared memory data structures
 *	available to POSTGRES: fixed-size structures, queues and hash
 *	tables.  Fixed-size structures contain things like global variables
 *	for a module and should never be allocated after the process
 *	initialization phase.  Hash tables have a fixed maximum size, but
 *	their actual size can vary dynamically.  When entries are added
 *	to the table, more space is allocated.	Queues link data structures
 *	that have been allocated either as fixed size structures or as hash
 *	buckets.  Each shared data structure has a string name to identify
 *	it (assigned in the module that declares it).
31
 *
32
 *		(b) During initialization, each module looks for its
33
 *	shared data structures in a hash table called the "Shmem Index".
34 35 36 37
 *	If the data structure is not present, the caller can allocate
 *	a new one and initialize it.  If the data structure is present,
 *	the caller "attaches" to the structure by initializing a pointer
 *	in the local address space.
38
 *		The shmem index has two purposes: first, it gives us
39
 *	a simple model of how the world looks when a backend process
40
 *	initializes.  If something is present in the shmem index,
41
 *	it is initialized.	If it is not, it is uninitialized.	Second,
42
 *	the shmem index allows us to allocate shared memory on demand
43 44 45 46
 *	instead of trying to preallocate structures and hard-wire the
 *	sizes and locations in header files.  If you are using a lot
 *	of shared memory in a lot of different places (and changing
 *	things during development), this is important.
47
 *
48 49 50 51 52 53 54
 *		(c) memory allocation model: shared memory can never be
 *	freed, once allocated.	 Each hash table has its own free list,
 *	so hash buckets can be reused when an item is deleted.	However,
 *	if one hash table grows very large and then shrinks, its space
 *	cannot be redistributed to other tables.  We could build a simple
 *	hash bucket garbage collector if need be.  Right now, it seems
 *	unnecessary.
55
 *
56
 *		See InitSem() in sem.c for an example of how to use the
57
 *	shmem index.
58 59 60 61
 *
 */
#include <stdio.h>
#include <string.h>
62

63 64 65 66
#include "postgres.h"
#include "storage/ipc.h"
#include "storage/shmem.h"
#include "storage/spin.h"
67
#include "storage/proc.h"
B
Bruce Momjian 已提交
68
#include "utils/dynahash.h"
69
#include "utils/hsearch.h"
70
#include "utils/memutils.h"
V
Vadim B. Mikheev 已提交
71 72
#include "access/xact.h"
#include "utils/tqual.h"
73 74 75

/* shared memory global variables */

76
unsigned long ShmemBase = 0;	/* start and end address of shared memory */
77 78
static unsigned long ShmemEnd = 0;
static unsigned long ShmemSize = 0;		/* current size (and default) */
79

80
extern VariableCache ShmemVariableCache;		/* varsup.c */
81

82
SPINLOCK	ShmemLock;			/* lock for shared memory allocation */
83

84
SPINLOCK	ShmemIndexLock;		/* lock for shmem index access */
85

86 87 88
static unsigned long *ShmemFreeStart = NULL;	/* pointer to the OFFSET
												 * of first free shared
												 * memory */
89 90 91
static unsigned long *ShmemIndexOffset = NULL;	/* start of the shmem
												 * index table (for
												 * bootstrap) */
92
static int	ShmemBootstrap = FALSE;		/* flag becomes true when shared
93
										 * mem is created by POSTMASTER */
94

95
static HTAB *ShmemIndex = NULL;
96 97

/* ---------------------
98
 * ShmemIndexReset() - Resets the shmem index to NULL....
99 100 101 102 103
 * useful when the postmaster destroys existing shared memory
 * and creates all new segments after a backend crash.
 * ----------------------
 */
void
104
ShmemIndexReset(void)
105
{
106
	ShmemIndex = (HTAB *) NULL;
107 108 109
}

/*
110
 *	CreateSharedRegion() --
111
 *
112 113 114 115
 *	This routine is called once by the postmaster to
 *	initialize the shared buffer pool.	Assume there is
 *	only one postmaster so no synchronization is necessary
 *	until after this routine completes successfully.
116 117 118 119 120 121 122 123 124
 *
 * key is a unique identifier for the shmem region.
 * size is the size of the region.
 */
static IpcMemoryId ShmemId;

void
ShmemCreate(unsigned int key, unsigned int size)
{
125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140
	if (size)
		ShmemSize = size;
	/* create shared mem region */
	if ((ShmemId = IpcMemoryCreate(key, ShmemSize, IPCProtection))
		== IpcMemCreationFailed)
	{
		elog(FATAL, "ShmemCreate: cannot create region");
		exit(1);
	}

	/*
	 * ShmemBootstrap is true if shared memory has been created, but not
	 * yet initialized.  Only the postmaster/creator-of-all-things should
	 * have this flag set.
	 */
	ShmemBootstrap = TRUE;
141 142 143
}

/*
144 145
 *	InitShmem() -- map region into process address space
 *		and initialize shared data structures.
146 147 148 149 150
 *
 */
int
InitShmem(unsigned int key, unsigned int size)
{
151 152
	Pointer		sharedRegion;
	unsigned long currFreeSpace;
153

154 155
	HASHCTL		info;
	int			hash_flags;
156
	ShmemIndexEnt *result,
157 158 159
				item;
	bool		found;
	IpcMemoryId shmid;
160

161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176
	/* if zero key, use default memory size */
	if (size)
		ShmemSize = size;

	/* default key is 0 */

	/* attach to shared memory region (SysV or BSD OS specific) */
	if (ShmemBootstrap && key == PrivateIPCKey)
		/* if we are running backend alone */
		shmid = ShmemId;
	else
		shmid = IpcMemoryIdGet(IPCKeyGetBufferMemoryKey(key), ShmemSize);
	sharedRegion = IpcMemoryAttach(shmid);
	if (sharedRegion == NULL)
	{
		elog(FATAL, "AttachSharedRegion: couldn't attach to shmem\n");
177
		return FALSE;
178 179 180 181 182 183 184 185 186
	}

	/* get pointers to the dimensions of shared memory */
	ShmemBase = (unsigned long) sharedRegion;
	ShmemEnd = (unsigned long) sharedRegion + ShmemSize;
	currFreeSpace = 0;

	/* First long in shared memory is the count of available space */
	ShmemFreeStart = (unsigned long *) ShmemBase;
187 188
	/* next is a shmem pointer to the shmem index */
	ShmemIndexOffset = ShmemFreeStart + 1;
189 190
	/* next is ShmemVariableCache */
	ShmemVariableCache = (VariableCache) (ShmemIndexOffset + 1);
191

192
	currFreeSpace += sizeof(ShmemFreeStart) + sizeof(ShmemIndexOffset) +
193
		LONGALIGN(sizeof(VariableCacheData));
194 195 196

	/*
	 * bootstrap initialize spin locks so we can start to use the
197
	 * allocator and shmem index.
198
	 */
199
	if (!InitSpinLocks(ShmemBootstrap, IPCKeyGetSpinLockSemaphoreKey(key)))
200
		return FALSE;
201 202 203 204 205 206

	/*
	 * We have just allocated additional space for two spinlocks. Now
	 * setup the global free space count
	 */
	if (ShmemBootstrap)
207
	{
208
		*ShmemFreeStart = currFreeSpace;
209
		memset(ShmemVariableCache, 0, sizeof(*ShmemVariableCache));
210
	}
211 212 213 214

	/* if ShmemFreeStart is NULL, then the allocator won't work */
	Assert(*ShmemFreeStart);

215 216 217
	/* create OR attach to the shared memory shmem index */
	info.keysize = SHMEM_INDEX_KEYSIZE;
	info.datasize = SHMEM_INDEX_DATASIZE;
218 219
	hash_flags = (HASH_ELEM);

220 221
	/* This will acquire the shmem index lock, but not release it. */
	ShmemIndex = ShmemInitHash("ShmemIndex",
222 223
							   SHMEM_INDEX_SIZE, SHMEM_INDEX_SIZE,
							   &info, hash_flags);
224

225
	if (!ShmemIndex)
226
	{
227
		elog(FATAL, "InitShmem: couldn't initialize Shmem Index");
228
		return FALSE;
229 230 231
	}

	/*
232
	 * Now, check the shmem index for an entry to the shmem index.	If
233 234
	 * there is an entry there, someone else created the table. Otherwise,
	 * we did and we have to initialize it.
235
	 */
236 237
	MemSet(item.key, 0, SHMEM_INDEX_KEYSIZE);
	strncpy(item.key, "ShmemIndex", SHMEM_INDEX_KEYSIZE);
238

239 240
	result = (ShmemIndexEnt *)
		hash_search(ShmemIndex, (char *) &item, HASH_ENTER, &found);
241 242 243 244


	if (!result)
	{
245
		elog(FATAL, "InitShmem: corrupted shmem index");
246
		return FALSE;
247 248 249 250 251 252
	}

	if (!found)
	{

		/*
253
		 * bootstrapping shmem: we have to initialize the shmem index now.
254 255 256
		 */

		Assert(ShmemBootstrap);
257 258 259
		result->location = MAKE_OFFSET(ShmemIndex->hctl);
		*ShmemIndexOffset = result->location;
		result->size = SHMEM_INDEX_SIZE;
260 261 262 263 264 265 266

		ShmemBootstrap = FALSE;

	}
	else
		Assert(!ShmemBootstrap);
	/* now release the lock acquired in ShmemHashInit */
267
	SpinRelease(ShmemIndexLock);
268

269
	Assert(result->location == MAKE_OFFSET(ShmemIndex->hctl));
270

271
	return TRUE;
272 273 274 275
}

/*
 * ShmemAlloc -- allocate word-aligned byte string from
276
 *		shared memory
277 278 279
 *
 * Assumes ShmemLock and ShmemFreeStart are initialized.
 * Returns: real pointer to memory or NULL if we are out
280 281
 *		of space.  Has to return a real pointer in order
 *		to be compatable with malloc().
282
 */
283
long *
284 285
ShmemAlloc(unsigned long size)
{
286 287
	unsigned long tmpFree;
	long	   *newSpace;
288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315

	/*
	 * ensure space is word aligned.
	 *
	 * Word-alignment is not good enough. We have to be more conservative:
	 * doubles need 8-byte alignment. (We probably only need this on RISC
	 * platforms but this is not a big waste of space.) - ay 12/94
	 */
	if (size % sizeof(double))
		size += sizeof(double) - (size % sizeof(double));

	Assert(*ShmemFreeStart);

	SpinAcquire(ShmemLock);

	tmpFree = *ShmemFreeStart + size;
	if (tmpFree <= ShmemSize)
	{
		newSpace = (long *) MAKE_PTR(*ShmemFreeStart);
		*ShmemFreeStart += size;
	}
	else
		newSpace = NULL;

	SpinRelease(ShmemLock);

	if (!newSpace)
		elog(NOTICE, "ShmemAlloc: out of memory ");
316
	return newSpace;
317 318 319
}

/*
320 321
 * ShmemIsValid -- test if an offset refers to valid shared memory
 *
322 323 324 325 326
 * Returns TRUE if the pointer is valid.
 */
int
ShmemIsValid(unsigned long addr)
{
327
	return (addr < ShmemEnd) && (addr >= ShmemBase);
328 329 330
}

/*
331 332
 * ShmemInitHash -- Create/Attach to and initialize
 *		shared memory hash table.
333 334 335 336 337 338 339 340
 *
 * Notes:
 *
 * assume caller is doing some kind of synchronization
 * so that two people dont try to create/initialize the
 * table at once.  Use SpinAlloc() to create a spinlock
 * for the structure before creating the structure itself.
 */
341
HTAB *
342
ShmemInitHash(char *name,		/* table string name for shmem index */
343 344
			  long init_size,	/* initial size */
			  long max_size,	/* max size of the table */
345
			  HASHCTL *infoP,	/* info about key and bucket size */
346
			  int hash_flags)	/* info about infoP */
347
{
348 349
	bool		found;
	long	   *location;
350 351 352 353 354 355 356 357 358 359 360

	/*
	 * shared memory hash tables have a fixed max size so that the control
	 * structures don't try to grow.  The segbase is for calculating
	 * pointer values.	The shared memory allocator must be specified.
	 */
	infoP->segbase = (long *) ShmemBase;
	infoP->alloc = ShmemAlloc;
	infoP->max_size = max_size;
	hash_flags |= HASH_SHARED_MEM;

361
	/* look it up in the shmem index */
362
	location = ShmemInitStruct(name, my_log2(max_size) + sizeof(HHDR), &found);
363 364

	/*
365
	 * shmem index is corrupted.	Let someone else give the error
366 367 368
	 * message since they have more information
	 */
	if (location == NULL)
369
		return 0;
370 371 372 373 374 375 376 377 378 379 380 381 382 383

	/*
	 * it already exists, attach to it rather than allocate and initialize
	 * new space
	 */
	if (found)
		hash_flags |= HASH_ATTACH;

	/* these structures were allocated or bound in ShmemInitStruct */
	/* control information and parameters */
	infoP->hctl = (long *) location;
	/* directory for hash lookup */
	infoP->dir = (long *) (location + sizeof(HHDR));

384
	return hash_create(init_size, infoP, hash_flags);;
385 386 387 388 389 390
}

/*
 * ShmemPIDLookup -- lookup process data structure using process id
 *
 * Returns: TRUE if no error.  locationPtr is initialized if PID is
391
 *		found in the shmem index.
392 393
 *
 * NOTES:
394 395
 *		only information about success or failure is the value of
 *		locationPtr.
396 397
 */
bool
398
ShmemPIDLookup(int pid, SHMEM_OFFSET *locationPtr)
399
{
400
	ShmemIndexEnt *result,
401 402
				item;
	bool		found;
403

404 405
	Assert(ShmemIndex);
	MemSet(item.key, 0, SHMEM_INDEX_KEYSIZE);
406 407
	sprintf(item.key, "PID %d", pid);

408 409 410
	SpinAcquire(ShmemIndexLock);
	result = (ShmemIndexEnt *)
		hash_search(ShmemIndex, (char *) &item, HASH_ENTER, &found);
411 412 413 414

	if (!result)
	{

415 416
		SpinRelease(ShmemIndexLock);
		elog(ERROR, "ShmemInitPID: ShmemIndex corrupted");
417
		return FALSE;
418 419 420 421 422 423 424 425

	}

	if (found)
		*locationPtr = result->location;
	else
		result->location = *locationPtr;

426
	SpinRelease(ShmemIndexLock);
427
	return TRUE;
428 429 430
}

/*
431
 * ShmemPIDDestroy -- destroy shmem index entry for process
432
 *		using process id
433 434
 *
 * Returns: offset of the process struct in shared memory or
435
 *		INVALID_OFFSET if not found.
436
 *
437
 * Side Effect: removes the entry from the shmem index
438 439 440 441
 */
SHMEM_OFFSET
ShmemPIDDestroy(int pid)
{
442
	ShmemIndexEnt *result,
443 444 445
				item;
	bool		found;
	SHMEM_OFFSET location = 0;
446

447
	Assert(ShmemIndex);
448

449
	MemSet(item.key, 0, SHMEM_INDEX_KEYSIZE);
450 451
	sprintf(item.key, "PID %d", pid);

452 453 454
	SpinAcquire(ShmemIndexLock);
	result = (ShmemIndexEnt *)
		hash_search(ShmemIndex, (char *) &item, HASH_REMOVE, &found);
455 456 457

	if (found)
		location = result->location;
458
	SpinRelease(ShmemIndexLock);
459 460 461 462

	if (!result)
	{

463
		elog(ERROR, "ShmemPIDDestroy: PID table corrupted");
464
		return INVALID_OFFSET;
465 466 467 468

	}

	if (found)
469
		return location;
470
	else
471
		return INVALID_OFFSET;
472 473 474 475
}

/*
 * ShmemInitStruct -- Create/attach to a structure in shared
476
 *		memory.
477
 *
478 479 480 481 482
 *	This is called during initialization to find or allocate
 *		a data structure in shared memory.	If no other processes
 *		have created the structure, this routine allocates space
 *		for it.  If it exists already, a pointer to the existing
 *		table is returned.
483
 *
484
 *	Returns: real pointer to the object.  FoundPtr is TRUE if
485
 *		the object is already in the shmem index (hence, already
486
 *		initialized).
487
 */
488
long *
489
ShmemInitStruct(char *name, unsigned long size, bool *foundPtr)
490
{
491
	ShmemIndexEnt *result,
492 493
				item;
	long	   *structPtr;
494

495
	strncpy(item.key, name, SHMEM_INDEX_KEYSIZE);
496 497
	item.location = BAD_LOCATION;

498
	SpinAcquire(ShmemIndexLock);
B
Bruce Momjian 已提交
499

500
	if (!ShmemIndex)
501
	{
B
Bruce Momjian 已提交
502
#ifdef USE_ASSERT_CHECKING
503
		char	   *strname = "ShmemIndex";
504

505
#endif
506

507
		/*
508
		 * If the shmem index doesnt exist, we fake it.
509
		 *
510
		 * If we are creating the first shmem index, then let shmemalloc()
511
		 * allocate the space for a new HTAB.  Otherwise, find the old one
512 513
		 * and return that.  Notice that the ShmemIndexLock is held until
		 * the shmem index has been completely initialized.
514 515 516 517 518 519 520
		 */
		Assert(!strcmp(name, strname));
		if (ShmemBootstrap)
		{
			/* in POSTMASTER/Single process */

			*foundPtr = FALSE;
521
			return (long *) ShmemAlloc(size);
522 523 524
		}
		else
		{
525
			Assert(ShmemIndexOffset);
526 527

			*foundPtr = TRUE;
528
			return (long *) MAKE_PTR(*ShmemIndexOffset);
529 530 531
		}


532
	}
533 534
	else
	{
535 536 537
		/* look it up in the shmem index */
		result = (ShmemIndexEnt *)
			hash_search(ShmemIndex, (char *) &item, HASH_ENTER, foundPtr);
538 539 540 541
	}

	if (!result)
	{
542
		SpinRelease(ShmemIndexLock);
543

544
		elog(ERROR, "ShmemInitStruct: Shmem Index corrupted");
545
		return NULL;
546

547
	}
548 549
	else if (*foundPtr)
	{
550

551
		/*
552
		 * Structure is in the shmem index so someone else has allocated
553 554 555 556 557
		 * it already.	The size better be the same as the size we are
		 * trying to initialize to or there is a name conflict (or worse).
		 */
		if (result->size != size)
		{
558
			SpinRelease(ShmemIndexLock);
559

560
			elog(NOTICE, "ShmemInitStruct: ShmemIndex entry size is wrong");
561
			/* let caller print its message too */
562
			return NULL;
563 564 565 566 567 568 569 570 571 572
		}
		structPtr = (long *) MAKE_PTR(result->location);
	}
	else
	{
		/* It isn't in the table yet. allocate and initialize it */
		structPtr = ShmemAlloc((long) size);
		if (!structPtr)
		{
			/* out of memory */
573 574 575
			Assert(ShmemIndex);
			hash_search(ShmemIndex, (char *) &item, HASH_REMOVE, foundPtr);
			SpinRelease(ShmemIndexLock);
576 577 578 579
			*foundPtr = FALSE;

			elog(NOTICE, "ShmemInitStruct: cannot allocate '%s'",
				 name);
580
			return NULL;
581 582 583 584 585 586
		}
		result->size = size;
		result->location = MAKE_OFFSET(structPtr);
	}
	Assert(ShmemIsValid((unsigned long) structPtr));

587
	SpinRelease(ShmemIndexLock);
588
	return structPtr;
589 590
}

591 592 593
/*
 * TransactionIdIsInProgress -- is given transaction running by some backend
 *
594
 * Strange place for this func, but we have to lookup process data structures
595 596 597
 * for all running backends. - vadim 11/26/96
 */
bool
598
TransactionIdIsInProgress(TransactionId xid)
599
{
600
	ShmemIndexEnt *result;
601
	PROC	   *proc;
602

603
	Assert(ShmemIndex);
604

605
	SpinAcquire(ShmemIndexLock);
606 607

	hash_seq((HTAB *) NULL);
608
	while ((result = (ShmemIndexEnt *) hash_seq(ShmemIndex)) != NULL)
609
	{
610
		if (result == (ShmemIndexEnt *) TRUE)
611
		{
612
			SpinRelease(ShmemIndexLock);
613
			return false;
614 615 616 617 618 619 620
		}
		if (result->location == INVALID_OFFSET ||
			strncmp(result->key, "PID ", 4) != 0)
			continue;
		proc = (PROC *) MAKE_PTR(result->location);
		if (proc->xid == xid)
		{
621
			SpinRelease(ShmemIndexLock);
622
			return true;
623
		}
624
	}
625

626 627
	SpinRelease(ShmemIndexLock);
	elog(ERROR, "TransactionIdIsInProgress: ShmemIndex corrupted");
628
	return false;
629
}
630 631 632 633 634 635 636

/*
 * GetSnapshotData -- returns information about running transactions.
 *
 * Yet another strange func for this place...	- vadim 07/21/98
 */
Snapshot
637
GetSnapshotData(bool serializable)
638
{
639 640 641 642
	Snapshot	snapshot = (Snapshot) malloc(sizeof(SnapshotData));
	ShmemIndexEnt *result;
	PROC	   *proc;
	TransactionId cid = GetCurrentTransactionId();
V
Vadim B. Mikheev 已提交
643
	uint32		count = 0;
V
Vadim B. Mikheev 已提交
644
	uint32		have = 32;
645 646

	Assert(ShmemIndex);
647

V
Vadim B. Mikheev 已提交
648
	snapshot->xip = (TransactionId *) malloc(have * sizeof(TransactionId));
649
	snapshot->xmin = cid;
650 651 652 653
	if (serializable)
		snapshot->xmax = cid;
	else
		ReadNewTransactionId(&(snapshot->xmax));
654 655 656 657 658 659 660 661 662 663 664

	SpinAcquire(ShmemIndexLock);

	hash_seq((HTAB *) NULL);
	while ((result = (ShmemIndexEnt *) hash_seq(ShmemIndex)) != NULL)
	{
		if (result == (ShmemIndexEnt *) TRUE)
		{
			if (MyProc->xmin == InvalidTransactionId)
				MyProc->xmin = snapshot->xmin;
			SpinRelease(ShmemIndexLock);
V
Vadim B. Mikheev 已提交
665
			snapshot->xcnt = count;
666
			return snapshot;
667 668 669 670 671
		}
		if (result->location == INVALID_OFFSET ||
			strncmp(result->key, "PID ", 4) != 0)
			continue;
		proc = (PROC *) MAKE_PTR(result->location);
V
Vadim B. Mikheev 已提交
672
		if (proc == MyProc || proc->xid < FirstTransactionId)
673 674 675 676 677
			continue;
		if (proc->xid < snapshot->xmin)
			snapshot->xmin = proc->xid;
		else if (proc->xid > snapshot->xmax)
			snapshot->xmax = proc->xid;
V
Vadim B. Mikheev 已提交
678
		if (have == 0)
679
		{
680
			snapshot->xip = (TransactionId *) realloc(snapshot->xip,
V
Vadim B. Mikheev 已提交
681
								   (count + 32) * sizeof(TransactionId));
V
Vadim B. Mikheev 已提交
682
			have = 32;
683 684
		}
		snapshot->xip[count] = proc->xid;
V
Vadim B. Mikheev 已提交
685
		have--;
686 687 688 689 690 691 692
		count++;
	}

	SpinRelease(ShmemIndexLock);
	free(snapshot->xip);
	free(snapshot);
	elog(ERROR, "GetSnapshotData: ShmemIndex corrupted");
693
	return NULL;
694
}