shmem.c 17.8 KB
Newer Older
1 2 3
/*-------------------------------------------------------------------------
 *
 * shmem.c--
4
 *	  create shared memory and initialize shared memory data structures.
5 6 7 8 9
 *
 * Copyright (c) 1994, Regents of the University of California
 *
 *
 * IDENTIFICATION
10
 *	  $Header: /cvsroot/pgsql/src/backend/storage/ipc/shmem.c,v 1.34 1998/12/18 09:10:34 vadim Exp $
11 12 13 14 15
 *
 *-------------------------------------------------------------------------
 */
/*
 * POSTGRES processes share one or more regions of shared memory.
16 17
 * The shared memory is created by a postmaster and is inherited
 * by each backends via fork().  The routines in this file are used for
18 19 20
 * allocating and binding to shared memory data structures.
 *
 * NOTES:
21 22 23 24 25 26 27 28 29 30
 *		(a) There are three kinds of shared memory data structures
 *	available to POSTGRES: fixed-size structures, queues and hash
 *	tables.  Fixed-size structures contain things like global variables
 *	for a module and should never be allocated after the process
 *	initialization phase.  Hash tables have a fixed maximum size, but
 *	their actual size can vary dynamically.  When entries are added
 *	to the table, more space is allocated.	Queues link data structures
 *	that have been allocated either as fixed size structures or as hash
 *	buckets.  Each shared data structure has a string name to identify
 *	it (assigned in the module that declares it).
31
 *
32
 *		(b) During initialization, each module looks for its
33
 *	shared data structures in a hash table called the "Shmem Index".
34 35 36 37
 *	If the data structure is not present, the caller can allocate
 *	a new one and initialize it.  If the data structure is present,
 *	the caller "attaches" to the structure by initializing a pointer
 *	in the local address space.
38
 *		The shmem index has two purposes: first, it gives us
39
 *	a simple model of how the world looks when a backend process
40
 *	initializes.  If something is present in the shmem index,
41
 *	it is initialized.	If it is not, it is uninitialized.	Second,
42
 *	the shmem index allows us to allocate shared memory on demand
43 44 45 46
 *	instead of trying to preallocate structures and hard-wire the
 *	sizes and locations in header files.  If you are using a lot
 *	of shared memory in a lot of different places (and changing
 *	things during development), this is important.
47
 *
48 49 50 51 52 53 54
 *		(c) memory allocation model: shared memory can never be
 *	freed, once allocated.	 Each hash table has its own free list,
 *	so hash buckets can be reused when an item is deleted.	However,
 *	if one hash table grows very large and then shrinks, its space
 *	cannot be redistributed to other tables.  We could build a simple
 *	hash bucket garbage collector if need be.  Right now, it seems
 *	unnecessary.
55
 *
56
 *		See InitSem() in sem.c for an example of how to use the
57
 *	shmem index.
58 59 60 61
 *
 */
#include <stdio.h>
#include <string.h>
62

63 64 65 66
#include "postgres.h"
#include "storage/ipc.h"
#include "storage/shmem.h"
#include "storage/spin.h"
67
#include "storage/proc.h"
B
Bruce Momjian 已提交
68
#include "utils/dynahash.h"
69
#include "utils/hsearch.h"
70
#include "utils/memutils.h"
V
Vadim B. Mikheev 已提交
71 72
#include "access/xact.h"
#include "utils/tqual.h"
73 74 75

/* shared memory global variables */

76
unsigned long ShmemBase = 0;	/* start and end address of shared memory */
77 78
static unsigned long ShmemEnd = 0;
static unsigned long ShmemSize = 0;		/* current size (and default) */
79

80
extern VariableCache ShmemVariableCache;		/* varsup.c */
81

82
SPINLOCK	ShmemLock;			/* lock for shared memory allocation */
83

84
SPINLOCK	ShmemIndexLock;		/* lock for shmem index access */
85

86 87 88
static unsigned long *ShmemFreeStart = NULL;	/* pointer to the OFFSET
												 * of first free shared
												 * memory */
89 90 91
static unsigned long *ShmemIndexOffset = NULL;	/* start of the shmem
												 * index table (for
												 * bootstrap) */
92
static int	ShmemBootstrap = FALSE;		/* flag becomes true when shared
93
										 * mem is created by POSTMASTER */
94

95
static HTAB *ShmemIndex = NULL;
96 97

/* ---------------------
98
 * ShmemIndexReset() - Resets the shmem index to NULL....
99 100 101 102 103
 * useful when the postmaster destroys existing shared memory
 * and creates all new segments after a backend crash.
 * ----------------------
 */
void
104
ShmemIndexReset(void)
105
{
106
	ShmemIndex = (HTAB *) NULL;
107 108 109
}

/*
110
 *	CreateSharedRegion() --
111
 *
112 113 114 115
 *	This routine is called once by the postmaster to
 *	initialize the shared buffer pool.	Assume there is
 *	only one postmaster so no synchronization is necessary
 *	until after this routine completes successfully.
116 117 118 119 120 121 122 123 124
 *
 * key is a unique identifier for the shmem region.
 * size is the size of the region.
 */
static IpcMemoryId ShmemId;

void
ShmemCreate(unsigned int key, unsigned int size)
{
125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140
	if (size)
		ShmemSize = size;
	/* create shared mem region */
	if ((ShmemId = IpcMemoryCreate(key, ShmemSize, IPCProtection))
		== IpcMemCreationFailed)
	{
		elog(FATAL, "ShmemCreate: cannot create region");
		exit(1);
	}

	/*
	 * ShmemBootstrap is true if shared memory has been created, but not
	 * yet initialized.  Only the postmaster/creator-of-all-things should
	 * have this flag set.
	 */
	ShmemBootstrap = TRUE;
141 142 143
}

/*
144 145
 *	InitShmem() -- map region into process address space
 *		and initialize shared data structures.
146 147 148 149 150
 *
 */
int
InitShmem(unsigned int key, unsigned int size)
{
151 152
	Pointer		sharedRegion;
	unsigned long currFreeSpace;
153

154 155
	HASHCTL		info;
	int			hash_flags;
156
	ShmemIndexEnt *result,
157 158 159
				item;
	bool		found;
	IpcMemoryId shmid;
160

161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176
	/* if zero key, use default memory size */
	if (size)
		ShmemSize = size;

	/* default key is 0 */

	/* attach to shared memory region (SysV or BSD OS specific) */
	if (ShmemBootstrap && key == PrivateIPCKey)
		/* if we are running backend alone */
		shmid = ShmemId;
	else
		shmid = IpcMemoryIdGet(IPCKeyGetBufferMemoryKey(key), ShmemSize);
	sharedRegion = IpcMemoryAttach(shmid);
	if (sharedRegion == NULL)
	{
		elog(FATAL, "AttachSharedRegion: couldn't attach to shmem\n");
177
		return FALSE;
178 179 180 181 182 183 184 185 186
	}

	/* get pointers to the dimensions of shared memory */
	ShmemBase = (unsigned long) sharedRegion;
	ShmemEnd = (unsigned long) sharedRegion + ShmemSize;
	currFreeSpace = 0;

	/* First long in shared memory is the count of available space */
	ShmemFreeStart = (unsigned long *) ShmemBase;
187 188
	/* next is a shmem pointer to the shmem index */
	ShmemIndexOffset = ShmemFreeStart + 1;
189 190
	/* next is ShmemVariableCache */
	ShmemVariableCache = (VariableCache) (ShmemIndexOffset + 1);
191 192

	currFreeSpace +=
193 194
		sizeof(ShmemFreeStart) + sizeof(ShmemIndexOffset) +
		LONGALIGN(sizeof(VariableCacheData));
195 196 197

	/*
	 * bootstrap initialize spin locks so we can start to use the
198
	 * allocator and shmem index.
199
	 */
200
	if (!InitSpinLocks(ShmemBootstrap, IPCKeyGetSpinLockSemaphoreKey(key)))
201
		return FALSE;
202 203 204 205 206 207

	/*
	 * We have just allocated additional space for two spinlocks. Now
	 * setup the global free space count
	 */
	if (ShmemBootstrap)
208
	{
209
		*ShmemFreeStart = currFreeSpace;
210
		memset(ShmemVariableCache, 0, sizeof(*ShmemVariableCache));
211
	}
212 213 214 215

	/* if ShmemFreeStart is NULL, then the allocator won't work */
	Assert(*ShmemFreeStart);

216 217 218
	/* create OR attach to the shared memory shmem index */
	info.keysize = SHMEM_INDEX_KEYSIZE;
	info.datasize = SHMEM_INDEX_DATASIZE;
219 220
	hash_flags = (HASH_ELEM);

221 222
	/* This will acquire the shmem index lock, but not release it. */
	ShmemIndex = ShmemInitHash("ShmemIndex",
223 224
							   SHMEM_INDEX_SIZE, SHMEM_INDEX_SIZE,
							   &info, hash_flags);
225

226
	if (!ShmemIndex)
227
	{
228
		elog(FATAL, "InitShmem: couldn't initialize Shmem Index");
229
		return FALSE;
230 231 232
	}

	/*
233
	 * Now, check the shmem index for an entry to the shmem index.	If
234 235
	 * there is an entry there, someone else created the table. Otherwise,
	 * we did and we have to initialize it.
236
	 */
237 238
	MemSet(item.key, 0, SHMEM_INDEX_KEYSIZE);
	strncpy(item.key, "ShmemIndex", SHMEM_INDEX_KEYSIZE);
239

240 241
	result = (ShmemIndexEnt *)
		hash_search(ShmemIndex, (char *) &item, HASH_ENTER, &found);
242 243 244 245


	if (!result)
	{
246
		elog(FATAL, "InitShmem: corrupted shmem index");
247
		return FALSE;
248 249 250 251 252 253
	}

	if (!found)
	{

		/*
254
		 * bootstrapping shmem: we have to initialize the shmem index now.
255 256 257
		 */

		Assert(ShmemBootstrap);
258 259 260
		result->location = MAKE_OFFSET(ShmemIndex->hctl);
		*ShmemIndexOffset = result->location;
		result->size = SHMEM_INDEX_SIZE;
261 262 263 264 265 266 267

		ShmemBootstrap = FALSE;

	}
	else
		Assert(!ShmemBootstrap);
	/* now release the lock acquired in ShmemHashInit */
268
	SpinRelease(ShmemIndexLock);
269

270
	Assert(result->location == MAKE_OFFSET(ShmemIndex->hctl));
271

272
	return TRUE;
273 274 275 276
}

/*
 * ShmemAlloc -- allocate word-aligned byte string from
277
 *		shared memory
278 279 280
 *
 * Assumes ShmemLock and ShmemFreeStart are initialized.
 * Returns: real pointer to memory or NULL if we are out
281 282
 *		of space.  Has to return a real pointer in order
 *		to be compatable with malloc().
283
 */
284
long *
285 286
ShmemAlloc(unsigned long size)
{
287 288
	unsigned long tmpFree;
	long	   *newSpace;
289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316

	/*
	 * ensure space is word aligned.
	 *
	 * Word-alignment is not good enough. We have to be more conservative:
	 * doubles need 8-byte alignment. (We probably only need this on RISC
	 * platforms but this is not a big waste of space.) - ay 12/94
	 */
	if (size % sizeof(double))
		size += sizeof(double) - (size % sizeof(double));

	Assert(*ShmemFreeStart);

	SpinAcquire(ShmemLock);

	tmpFree = *ShmemFreeStart + size;
	if (tmpFree <= ShmemSize)
	{
		newSpace = (long *) MAKE_PTR(*ShmemFreeStart);
		*ShmemFreeStart += size;
	}
	else
		newSpace = NULL;

	SpinRelease(ShmemLock);

	if (!newSpace)
		elog(NOTICE, "ShmemAlloc: out of memory ");
317
	return newSpace;
318 319 320
}

/*
321 322
 * ShmemIsValid -- test if an offset refers to valid shared memory
 *
323 324 325 326 327
 * Returns TRUE if the pointer is valid.
 */
int
ShmemIsValid(unsigned long addr)
{
328
	return (addr < ShmemEnd) && (addr >= ShmemBase);
329 330 331
}

/*
332 333
 * ShmemInitHash -- Create/Attach to and initialize
 *		shared memory hash table.
334 335 336 337 338 339 340 341
 *
 * Notes:
 *
 * assume caller is doing some kind of synchronization
 * so that two people dont try to create/initialize the
 * table at once.  Use SpinAlloc() to create a spinlock
 * for the structure before creating the structure itself.
 */
342
HTAB *
343
ShmemInitHash(char *name,		/* table string name for shmem index */
344 345
			  long init_size,	/* initial size */
			  long max_size,	/* max size of the table */
346
			  HASHCTL *infoP,	/* info about key and bucket size */
347
			  int hash_flags)	/* info about infoP */
348
{
349 350
	bool		found;
	long	   *location;
351 352 353 354 355 356 357 358 359 360 361

	/*
	 * shared memory hash tables have a fixed max size so that the control
	 * structures don't try to grow.  The segbase is for calculating
	 * pointer values.	The shared memory allocator must be specified.
	 */
	infoP->segbase = (long *) ShmemBase;
	infoP->alloc = ShmemAlloc;
	infoP->max_size = max_size;
	hash_flags |= HASH_SHARED_MEM;

362
	/* look it up in the shmem index */
363 364 365 366
	location =
		ShmemInitStruct(name, my_log2(max_size) + sizeof(HHDR), &found);

	/*
367
	 * shmem index is corrupted.	Let someone else give the error
368 369 370
	 * message since they have more information
	 */
	if (location == NULL)
371
		return 0;
372 373 374 375 376 377 378 379 380 381 382 383 384 385

	/*
	 * it already exists, attach to it rather than allocate and initialize
	 * new space
	 */
	if (found)
		hash_flags |= HASH_ATTACH;

	/* these structures were allocated or bound in ShmemInitStruct */
	/* control information and parameters */
	infoP->hctl = (long *) location;
	/* directory for hash lookup */
	infoP->dir = (long *) (location + sizeof(HHDR));

386
	return hash_create(init_size, infoP, hash_flags);;
387 388 389 390 391 392
}

/*
 * ShmemPIDLookup -- lookup process data structure using process id
 *
 * Returns: TRUE if no error.  locationPtr is initialized if PID is
393
 *		found in the shmem index.
394 395
 *
 * NOTES:
396 397
 *		only information about success or failure is the value of
 *		locationPtr.
398 399
 */
bool
400
ShmemPIDLookup(int pid, SHMEM_OFFSET *locationPtr)
401
{
402
	ShmemIndexEnt *result,
403 404
				item;
	bool		found;
405

406 407
	Assert(ShmemIndex);
	MemSet(item.key, 0, SHMEM_INDEX_KEYSIZE);
408 409
	sprintf(item.key, "PID %d", pid);

410 411 412
	SpinAcquire(ShmemIndexLock);
	result = (ShmemIndexEnt *)
		hash_search(ShmemIndex, (char *) &item, HASH_ENTER, &found);
413 414 415 416

	if (!result)
	{

417 418
		SpinRelease(ShmemIndexLock);
		elog(ERROR, "ShmemInitPID: ShmemIndex corrupted");
419
		return FALSE;
420 421 422 423 424 425 426 427

	}

	if (found)
		*locationPtr = result->location;
	else
		result->location = *locationPtr;

428
	SpinRelease(ShmemIndexLock);
429
	return TRUE;
430 431 432
}

/*
433
 * ShmemPIDDestroy -- destroy shmem index entry for process
434
 *		using process id
435 436
 *
 * Returns: offset of the process struct in shared memory or
437
 *		INVALID_OFFSET if not found.
438
 *
439
 * Side Effect: removes the entry from the shmem index
440 441 442 443
 */
SHMEM_OFFSET
ShmemPIDDestroy(int pid)
{
444
	ShmemIndexEnt *result,
445 446 447
				item;
	bool		found;
	SHMEM_OFFSET location = 0;
448

449
	Assert(ShmemIndex);
450

451
	MemSet(item.key, 0, SHMEM_INDEX_KEYSIZE);
452 453
	sprintf(item.key, "PID %d", pid);

454 455 456
	SpinAcquire(ShmemIndexLock);
	result = (ShmemIndexEnt *)
		hash_search(ShmemIndex, (char *) &item, HASH_REMOVE, &found);
457 458 459

	if (found)
		location = result->location;
460
	SpinRelease(ShmemIndexLock);
461 462 463 464

	if (!result)
	{

465
		elog(ERROR, "ShmemPIDDestroy: PID table corrupted");
466
		return INVALID_OFFSET;
467 468 469 470

	}

	if (found)
471
		return location;
472
	else
473
		return INVALID_OFFSET;
474 475 476 477
}

/*
 * ShmemInitStruct -- Create/attach to a structure in shared
478
 *		memory.
479
 *
480 481 482 483 484
 *	This is called during initialization to find or allocate
 *		a data structure in shared memory.	If no other processes
 *		have created the structure, this routine allocates space
 *		for it.  If it exists already, a pointer to the existing
 *		table is returned.
485
 *
486
 *	Returns: real pointer to the object.  FoundPtr is TRUE if
487
 *		the object is already in the shmem index (hence, already
488
 *		initialized).
489
 */
490
long *
491
ShmemInitStruct(char *name, unsigned long size, bool *foundPtr)
492
{
493
	ShmemIndexEnt *result,
494 495
				item;
	long	   *structPtr;
496

497
	strncpy(item.key, name, SHMEM_INDEX_KEYSIZE);
498 499
	item.location = BAD_LOCATION;

500
	SpinAcquire(ShmemIndexLock);
B
Bruce Momjian 已提交
501

502
	if (!ShmemIndex)
503
	{
B
Bruce Momjian 已提交
504
#ifdef USE_ASSERT_CHECKING
505
		char	   *strname = "ShmemIndex";
506

507
#endif
508

509
		/*
510
		 * If the shmem index doesnt exist, we fake it.
511
		 *
512
		 * If we are creating the first shmem index, then let shmemalloc()
513
		 * allocate the space for a new HTAB.  Otherwise, find the old one
514 515
		 * and return that.  Notice that the ShmemIndexLock is held until
		 * the shmem index has been completely initialized.
516 517 518 519 520 521 522
		 */
		Assert(!strcmp(name, strname));
		if (ShmemBootstrap)
		{
			/* in POSTMASTER/Single process */

			*foundPtr = FALSE;
523
			return (long *) ShmemAlloc(size);
524 525 526
		}
		else
		{
527
			Assert(ShmemIndexOffset);
528 529

			*foundPtr = TRUE;
530
			return (long *) MAKE_PTR(*ShmemIndexOffset);
531 532 533
		}


534
	}
535 536
	else
	{
537 538 539
		/* look it up in the shmem index */
		result = (ShmemIndexEnt *)
			hash_search(ShmemIndex, (char *) &item, HASH_ENTER, foundPtr);
540 541 542 543
	}

	if (!result)
	{
544
		SpinRelease(ShmemIndexLock);
545

546
		elog(ERROR, "ShmemInitStruct: Shmem Index corrupted");
547
		return NULL;
548

549
	}
550 551
	else if (*foundPtr)
	{
552

553
		/*
554
		 * Structure is in the shmem index so someone else has allocated
555 556 557 558 559
		 * it already.	The size better be the same as the size we are
		 * trying to initialize to or there is a name conflict (or worse).
		 */
		if (result->size != size)
		{
560
			SpinRelease(ShmemIndexLock);
561

562
			elog(NOTICE, "ShmemInitStruct: ShmemIndex entry size is wrong");
563
			/* let caller print its message too */
564
			return NULL;
565 566 567 568 569 570 571 572 573 574
		}
		structPtr = (long *) MAKE_PTR(result->location);
	}
	else
	{
		/* It isn't in the table yet. allocate and initialize it */
		structPtr = ShmemAlloc((long) size);
		if (!structPtr)
		{
			/* out of memory */
575 576 577
			Assert(ShmemIndex);
			hash_search(ShmemIndex, (char *) &item, HASH_REMOVE, foundPtr);
			SpinRelease(ShmemIndexLock);
578 579 580 581
			*foundPtr = FALSE;

			elog(NOTICE, "ShmemInitStruct: cannot allocate '%s'",
				 name);
582
			return NULL;
583 584 585 586 587 588
		}
		result->size = size;
		result->location = MAKE_OFFSET(structPtr);
	}
	Assert(ShmemIsValid((unsigned long) structPtr));

589
	SpinRelease(ShmemIndexLock);
590
	return structPtr;
591 592
}

593 594 595
/*
 * TransactionIdIsInProgress -- is given transaction running by some backend
 *
596
 * Strange place for this func, but we have to lookup process data structures
597 598 599
 * for all running backends. - vadim 11/26/96
 */
bool
600
TransactionIdIsInProgress(TransactionId xid)
601
{
602
	ShmemIndexEnt *result;
603
	PROC	   *proc;
604

605
	Assert(ShmemIndex);
606

607
	SpinAcquire(ShmemIndexLock);
608 609

	hash_seq((HTAB *) NULL);
610
	while ((result = (ShmemIndexEnt *) hash_seq(ShmemIndex)) != NULL)
611
	{
612
		if (result == (ShmemIndexEnt *) TRUE)
613
		{
614
			SpinRelease(ShmemIndexLock);
615
			return false;
616 617 618 619 620 621 622
		}
		if (result->location == INVALID_OFFSET ||
			strncmp(result->key, "PID ", 4) != 0)
			continue;
		proc = (PROC *) MAKE_PTR(result->location);
		if (proc->xid == xid)
		{
623
			SpinRelease(ShmemIndexLock);
624
			return true;
625
		}
626
	}
627

628 629
	SpinRelease(ShmemIndexLock);
	elog(ERROR, "TransactionIdIsInProgress: ShmemIndex corrupted");
630
	return false;
631
}
632 633 634 635 636 637 638

/*
 * GetSnapshotData -- returns information about running transactions.
 *
 * Yet another strange func for this place...	- vadim 07/21/98
 */
Snapshot
639
GetSnapshotData(bool serializable)
640
{
641 642 643 644
	Snapshot	snapshot = (Snapshot) malloc(sizeof(SnapshotData));
	ShmemIndexEnt *result;
	PROC	   *proc;
	TransactionId cid = GetCurrentTransactionId();
V
Vadim B. Mikheev 已提交
645
	uint32		count = 0;
V
Vadim B. Mikheev 已提交
646
	uint32		have = 32;
647 648

	Assert(ShmemIndex);
649

V
Vadim B. Mikheev 已提交
650
	snapshot->xip = (TransactionId *) malloc(have * sizeof(TransactionId));
651
	snapshot->xmin = cid;
652 653 654 655
	if (serializable)
		snapshot->xmax = cid;
	else
		ReadNewTransactionId(&(snapshot->xmax));
656 657 658 659 660 661 662 663 664 665 666

	SpinAcquire(ShmemIndexLock);

	hash_seq((HTAB *) NULL);
	while ((result = (ShmemIndexEnt *) hash_seq(ShmemIndex)) != NULL)
	{
		if (result == (ShmemIndexEnt *) TRUE)
		{
			if (MyProc->xmin == InvalidTransactionId)
				MyProc->xmin = snapshot->xmin;
			SpinRelease(ShmemIndexLock);
V
Vadim B. Mikheev 已提交
667
			snapshot->xcnt = count;
668
			return snapshot;
669 670 671 672 673
		}
		if (result->location == INVALID_OFFSET ||
			strncmp(result->key, "PID ", 4) != 0)
			continue;
		proc = (PROC *) MAKE_PTR(result->location);
V
Vadim B. Mikheev 已提交
674
		if (proc == MyProc || proc->xid < FirstTransactionId)
675 676 677 678 679
			continue;
		if (proc->xid < snapshot->xmin)
			snapshot->xmin = proc->xid;
		else if (proc->xid > snapshot->xmax)
			snapshot->xmax = proc->xid;
V
Vadim B. Mikheev 已提交
680
		if (have == 0)
681
		{
682
			snapshot->xip = (TransactionId *) realloc(snapshot->xip,
V
Vadim B. Mikheev 已提交
683
								   (count + 32) * sizeof(TransactionId));
V
Vadim B. Mikheev 已提交
684
			have = 32;
685 686
		}
		snapshot->xip[count] = proc->xid;
V
Vadim B. Mikheev 已提交
687
		have--;
688 689 690 691 692 693 694
		count++;
	}

	SpinRelease(ShmemIndexLock);
	free(snapshot->xip);
	free(snapshot);
	elog(ERROR, "GetSnapshotData: ShmemIndex corrupted");
695
	return NULL;
696
}