ipc.c 26.2 KB
Newer Older
1
/*-------------------------------------------------------------------------
2
 *
3
 * ipc.c
4
 *	  POSTGRES inter-process communication definitions.
5
 *
B
Add:  
Bruce Momjian 已提交
6 7
 * Portions Copyright (c) 1996-2000, PostgreSQL, Inc
 * Portions Copyright (c) 1994, Regents of the University of California
8 9 10
 *
 *
 * IDENTIFICATION
11
 *	  $Header: /cvsroot/pgsql/src/backend/storage/ipc/ipc.c,v 1.58 2000/12/30 01:20:55 tgl Exp $
12 13 14
 *
 * NOTES
 *
15 16 17 18 19 20 21 22 23 24
 *	  Currently, semaphores are used (my understanding anyway) in two
 *	  different ways:
 *		1. as mutexes on machines that don't have test-and-set (eg.
 *		   mips R3000).
 *		2. for putting processes to sleep when waiting on a lock
 *		   and waking them up when the lock is free.
 *	  The number of semaphores in (1) is fixed and those are shared
 *	  among all backends. In (2), there is 1 semaphore per process and those
 *	  are not shared with anyone else.
 *														  -ay 4/95
25 26 27
 *
 *-------------------------------------------------------------------------
 */
28 29
#include "postgres.h"

30 31 32
#include <sys/types.h>
#include <sys/file.h>
#include <errno.h>
33
#include <signal.h>
34
#include <unistd.h>
35 36

#include "storage/ipc.h"
37
#include "storage/s_lock.h"
38
/* In Ultrix, sem.h and shm.h must be included AFTER ipc.h */
39
#ifdef HAVE_SYS_SEM_H
40
#include <sys/sem.h>
41 42
#endif
#ifdef HAVE_SYS_SHM_H
43
#include <sys/shm.h>
44 45 46 47
#endif
#ifdef HAVE_KERNEL_OS_H
#include <kernel/OS.h>
#endif
48

B
Bruce Momjian 已提交
49
#if defined(solaris_sparc)
50 51 52
#include <sys/ipc.h>
#endif

53 54 55 56 57 58 59 60
#if defined(__darwin__)
#include "port/darwin/sem.h"
#endif

#include "miscadmin.h"
#include "utils/memutils.h"
#include "libpq/libpq.h"

61

62 63 64 65 66
/*
 * This flag is set during proc_exit() to change elog()'s behavior,
 * so that an elog() from an on_proc_exit routine cannot get us out
 * of the exit procedure.  We do NOT want to go back to the idle loop...
 */
67
bool		proc_exit_inprogress = false;
68

69 70 71 72 73 74 75 76 77 78
static IpcSemaphoreId InternalIpcSemaphoreCreate(IpcSemaphoreKey semKey,
						   int numSems, int permission,
						   int semStartValue, bool removeOnExit);
static void CallbackSemaphoreKill(int status, Datum semId);
static void *InternalIpcMemoryCreate(IpcMemoryKey memKey, uint32 size,
									 int permission);
static void IpcMemoryDetach(int status, Datum shmaddr);
static void IpcMemoryDelete(int status, Datum shmId);
static void *PrivateMemoryCreate(uint32 size);
static void PrivateMemoryDelete(int status, Datum memaddr);
79

80

81
/* ----------------------------------------------------------------
82
 *						exit() handling stuff
83 84 85 86 87 88 89 90 91 92 93
 *
 * These functions are in generally the same spirit as atexit(2),
 * but provide some additional features we need --- in particular,
 * we want to register callbacks to invoke when we are disconnecting
 * from a broken shared-memory context but not exiting the postmaster.
 *
 * Callback functions can take zero, one, or two args: the first passed
 * arg is the integer exitcode, the second is the Datum supplied when
 * the callback was registered.
 *
 * XXX these functions probably ought to live in some other module.
94 95 96 97 98
 * ----------------------------------------------------------------
 */

#define MAX_ON_EXITS 20

99 100
static struct ONEXIT
{
101
	void		(*function) ();
102
	Datum		arg;
103 104
}			on_proc_exit_list[MAX_ON_EXITS],
			on_shmem_exit_list[MAX_ON_EXITS];
105

106 107
static int	on_proc_exit_index,
			on_shmem_exit_index;
108 109 110


/* ----------------------------------------------------------------
111
 *		proc_exit
112
 *
113 114 115 116
 *		this function calls all the callbacks registered
 *		for it (to free resources) and then calls exit.
 *		This should be the only function to call exit().
 *		-cim 2/6/90
117 118 119
 * ----------------------------------------------------------------
 */
void
120
proc_exit(int code)
121
{
122

123
	/*
124 125
	 * Once we set this flag, we are committed to exit.  Any elog() will
	 * NOT send control back to the main loop, but right back here.
M
 
Marc G. Fournier 已提交
126
	 */
127
	proc_exit_inprogress = true;
B
Bruce Momjian 已提交
128

129 130
	if (DebugLvl > 1)
		elog(DEBUG, "proc_exit(%d)", code);
131

132 133
	/* do our shared memory exits first */
	shmem_exit(code);
134

135 136
	/* ----------------
	 *	call all the callbacks registered before calling exit().
137 138 139 140 141 142
	 *
	 *	Note that since we decrement on_proc_exit_index each time,
	 *	if a callback calls elog(ERROR) or elog(FATAL) then it won't
	 *	be invoked again when control comes back here (nor will the
	 *	previously-completed callbacks).  So, an infinite loop
	 *	should not be possible.
143 144
	 * ----------------
	 */
145 146
	while (--on_proc_exit_index >= 0)
		(*on_proc_exit_list[on_proc_exit_index].function) (code,
147
							  on_proc_exit_list[on_proc_exit_index].arg);
148

149 150
	if (DebugLvl > 1)
		elog(DEBUG, "exit(%d)", code);
151
	exit(code);
152 153 154
}

/* ------------------
155
 * Run all of the on_shmem_exit routines --- but don't actually exit.
156
 * This is used by the postmaster to re-initialize shared memory and
157
 * semaphores after a backend dies horribly.
158 159 160
 * ------------------
 */
void
161
shmem_exit(int code)
162
{
163 164
	if (DebugLvl > 1)
		elog(DEBUG, "shmem_exit(%d)", code);
M
 
Marc G. Fournier 已提交
165

166
	/* ----------------
167 168 169 170
	 *	call all the registered callbacks.
	 *
	 *	As with proc_exit(), we remove each callback from the list
	 *	before calling it, to avoid infinite loop in case of error.
171 172
	 * ----------------
	 */
173 174
	while (--on_shmem_exit_index >= 0)
		(*on_shmem_exit_list[on_shmem_exit_index].function) (code,
175
							on_shmem_exit_list[on_shmem_exit_index].arg);
176 177 178

	on_shmem_exit_index = 0;
}
179

180 181 182 183 184 185 186
/* ----------------------------------------------------------------
 *		on_proc_exit
 *
 *		this function adds a callback function to the list of
 *		functions invoked by proc_exit().	-cim 2/6/90
 * ----------------------------------------------------------------
 */
187
void
188
on_proc_exit(void (*function) (), Datum arg)
189 190
{
	if (on_proc_exit_index >= MAX_ON_EXITS)
191
		elog(FATAL, "Out of on_proc_exit slots");
192 193 194 195 196

	on_proc_exit_list[on_proc_exit_index].function = function;
	on_proc_exit_list[on_proc_exit_index].arg = arg;

	++on_proc_exit_index;
197 198 199
}

/* ----------------------------------------------------------------
200
 *		on_shmem_exit
201
 *
202
 *		this function adds a callback function to the list of
203
 *		functions invoked by shmem_exit().	-cim 2/6/90
204 205
 * ----------------------------------------------------------------
 */
206
void
207
on_shmem_exit(void (*function) (), Datum arg)
208
{
209
	if (on_shmem_exit_index >= MAX_ON_EXITS)
210
		elog(FATAL, "Out of on_shmem_exit slots");
211

212 213
	on_shmem_exit_list[on_shmem_exit_index].function = function;
	on_shmem_exit_list[on_shmem_exit_index].arg = arg;
214

215
	++on_shmem_exit_index;
216 217
}

218
/* ----------------------------------------------------------------
219
 *		on_exit_reset
220
 *
221 222 223 224
 *		this function clears all on_proc_exit() and on_shmem_exit()
 *		registered functions.  This is used just after forking a backend,
 *		so that the backend doesn't believe it should call the postmaster's
 *		on-exit routines when it exits...
225 226 227
 * ----------------------------------------------------------------
 */
void
228
on_exit_reset(void)
229
{
230
	on_shmem_exit_index = 0;
231
	on_proc_exit_index = 0;
232 233
}

234

235 236
/* ----------------------------------------------------------------
 *						Semaphore support
237
 *
238 239 240
 * These routines represent a fairly thin layer on top of SysV semaphore
 * functionality.
 * ----------------------------------------------------------------
241 242
 */

243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259
/* ----------------------------------------------------------------
 *	InternalIpcSemaphoreCreate(semKey, numSems, permission,
 *							   semStartValue, removeOnExit)
 *
 * Attempt to create a new semaphore set with the specified key.
 * Will fail (return -1) if such a set already exists.
 * On success, a callback is optionally registered with on_shmem_exit
 * to delete the semaphore set when on_shmem_exit is called.
 *
 * If we fail with a failure code other than collision-with-existing-set,
 * print out an error and abort.  Other types of errors are not recoverable.
 * ----------------------------------------------------------------
 */
static IpcSemaphoreId
InternalIpcSemaphoreCreate(IpcSemaphoreKey semKey,
						   int numSems, int permission,
						   int semStartValue, bool removeOnExit)
260
{
261
	int			semId;
262 263 264
	int			i;
	u_short		array[IPC_NMAXSEM];
	union semun semun;
265

266
	Assert(numSems > 0 && numSems <= IPC_NMAXSEM);
267

268
	semId = semget(semKey, numSems, IPC_CREAT | IPC_EXCL | permission);
269

270
	if (semId < 0)
271
	{
272 273 274
		/*
		 * Fail quietly if error indicates a collision with existing set.
		 * One would expect EEXIST, given that we said IPC_EXCL, but perhaps
275 276
		 * we could get a permission violation instead?  Also, EIDRM might
		 * occur if an old set is slated for destruction but not gone yet.
277
		 */
278 279 280 281 282
		if (errno == EEXIST || errno == EACCES
#ifdef EIDRM
			|| errno == EIDRM
#endif
			)
283 284 285 286 287 288 289
			return -1;
		/*
		 * Else complain and abort
		 */
		fprintf(stderr, "IpcSemaphoreCreate: semget(key=%d, num=%d, 0%o) failed: %s\n",
				(int) semKey, numSems, (IPC_CREAT|IPC_EXCL|permission),
				strerror(errno));
290

291 292 293 294 295 296 297 298
		if (errno == ENOSPC)
			fprintf(stderr,
					"\nThis error does *not* mean that you have run out of disk space.\n\n"
					"It occurs either because system limit for the maximum number of\n"
					"semaphore sets (SEMMNI), or the system wide maximum number of\n"
					"semaphores (SEMMNS), would be exceeded.  You need to raise the\n"
					"respective kernel parameter.  Look into the PostgreSQL documentation\n"
					"for details.\n\n");
299

300 301
		proc_exit(1);
	}
302

303 304 305 306 307 308 309 310
	/* Initialize new semas to specified start value */
	for (i = 0; i < numSems; i++)
		array[i] = semStartValue;
	semun.array = array;
	if (semctl(semId, 0, SETALL, semun) < 0)
	{
		fprintf(stderr, "IpcSemaphoreCreate: semctl(id=%d, 0, SETALL, ...) failed: %s\n",
				semId, strerror(errno));
311

312 313 314 315 316
		if (errno == ERANGE)
			fprintf(stderr,
					"You possibly need to raise your kernel's SEMVMX value to be at least\n"
					"%d.  Look into the PostgreSQL documentation for details.\n",
					semStartValue);
317

318 319
		IpcSemaphoreKill(semId);
		proc_exit(1);
320 321
	}

322 323 324
	/* Register on-exit routine to delete the new set */
	if (removeOnExit)
		on_shmem_exit(CallbackSemaphoreKill, Int32GetDatum(semId));
325

326
	return semId;
327 328 329
}

/****************************************************************************/
330
/*	 IpcSemaphoreKill(semId)	- removes a semaphore set					*/
331
/*																			*/
332 333
/****************************************************************************/
void
334
IpcSemaphoreKill(IpcSemaphoreId semId)
335
{
336
	union semun semun;
337

338
	semun.val = 0;		/* unused, but keep compiler quiet */
339

340 341
	if (semctl(semId, 0, IPC_RMID, semun) < 0)
		fprintf(stderr, "IpcSemaphoreKill: semctl(%d, 0, IPC_RMID, ...) failed: %s\n",
342
				semId, strerror(errno));
343 344 345
	/* We used to report a failure via elog(NOTICE), but that's pretty
	 * pointless considering any client has long since disconnected ...
	 */
346
}
347

348
/****************************************************************************/
349 350
/*	 CallbackSemaphoreKill(status, semId)									*/
/*	(called as an on_shmem_exit callback, hence funny argument list)		*/
351
/****************************************************************************/
352 353
static void
CallbackSemaphoreKill(int status, Datum semId)
354
{
355
	IpcSemaphoreKill(DatumGetInt32(semId));
356 357 358
}

/****************************************************************************/
359
/*	 IpcSemaphoreLock(semId, sem) - locks a semaphore						*/
360 361
/****************************************************************************/
void
362
IpcSemaphoreLock(IpcSemaphoreId semId, int sem)
363
{
364 365
	int			errStatus;
	struct sembuf sops;
366

367
	sops.sem_op = -1;			/* decrement */
368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383
	sops.sem_flg = 0;
	sops.sem_num = sem;

	/* ----------------
	 *	Note: if errStatus is -1 and errno == EINTR then it means we
	 *		  returned from the operation prematurely because we were
	 *		  sent a signal.  So we try and lock the semaphore again.
	 * ----------------
	 */
	do
	{
		errStatus = semop(semId, &sops, 1);
	} while (errStatus == -1 && errno == EINTR);

	if (errStatus == -1)
	{
384 385
        fprintf(stderr, "IpcSemaphoreLock: semop(id=%d) failed: %s\n",
				semId, strerror(errno));
386
		proc_exit(255);
387
	}
388 389 390
}

/****************************************************************************/
391
/*	 IpcSemaphoreUnlock(semId, sem)		- unlocks a semaphore				*/
392 393
/****************************************************************************/
void
394
IpcSemaphoreUnlock(IpcSemaphoreId semId, int sem)
395
{
396 397
	int			errStatus;
	struct sembuf sops;
398

399
	sops.sem_op = 1;			/* increment */
400 401 402 403 404 405 406
	sops.sem_flg = 0;
	sops.sem_num = sem;


	/* ----------------
	 *	Note: if errStatus is -1 and errno == EINTR then it means we
	 *		  returned from the operation prematurely because we were
407 408
	 *		  sent a signal.  So we try and unlock the semaphore again.
	 *		  Not clear this can really happen, but might as well cope.
409 410 411 412 413 414 415 416 417
	 * ----------------
	 */
	do
	{
		errStatus = semop(semId, &sops, 1);
	} while (errStatus == -1 && errno == EINTR);

	if (errStatus == -1)
	{
418 419
		fprintf(stderr, "IpcSemaphoreUnlock: semop(id=%d) failed: %s\n",
				semId, strerror(errno));
420
		proc_exit(255);
421
	}
422 423
}

424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469
/****************************************************************************/
/*	 IpcSemaphoreTryLock(semId, sem)	- conditionally locks a semaphore	*/
/* Lock the semaphore if it's free, but don't block.						*/
/****************************************************************************/
bool
IpcSemaphoreTryLock(IpcSemaphoreId semId, int sem)
{
	int			errStatus;
	struct sembuf sops;

	sops.sem_op = -1;			/* decrement */
	sops.sem_flg = IPC_NOWAIT;	/* but don't block */
	sops.sem_num = sem;

	/* ----------------
	 *	Note: if errStatus is -1 and errno == EINTR then it means we
	 *		  returned from the operation prematurely because we were
	 *		  sent a signal.  So we try and lock the semaphore again.
	 * ----------------
	 */
	do
	{
		errStatus = semop(semId, &sops, 1);
	} while (errStatus == -1 && errno == EINTR);

	if (errStatus == -1)
	{
		/* Expect EAGAIN or EWOULDBLOCK (platform-dependent) */
#ifdef EAGAIN
		if (errno == EAGAIN)
			return false;		/* failed to lock it */
#endif
#if defined(EWOULDBLOCK) && (!defined(EAGAIN) || (EWOULDBLOCK != EAGAIN))
		if (errno == EWOULDBLOCK)
			return false;		/* failed to lock it */
#endif
		/* Otherwise we got trouble */
        fprintf(stderr, "IpcSemaphoreTryLock: semop(id=%d) failed: %s\n",
				semId, strerror(errno));
		proc_exit(255);
	}

	return true;
}

/* Get the current value (semval) of the semaphore */
470
int
471
IpcSemaphoreGetValue(IpcSemaphoreId semId, int sem)
472
{
473
	union semun dummy;			/* for Solaris */
474
	dummy.val = 0;		/* unused */
475

476
	return semctl(semId, sem, GETVAL, dummy);
477 478
}

479 480 481
/* Get the PID of the last process to do semop() on the semaphore */
static pid_t
IpcSemaphoreGetLastPID(IpcSemaphoreId semId, int sem)
482
{
483
	union semun dummy;			/* for Solaris */
484
	dummy.val = 0;		/* unused */
485

486
	return semctl(semId, sem, GETPID, dummy);
487 488 489
}


490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512
/* ----------------------------------------------------------------
 *						Shared memory support
 *
 * These routines represent a fairly thin layer on top of SysV shared
 * memory functionality.
 * ----------------------------------------------------------------
 */

/* ----------------------------------------------------------------
 *	InternalIpcMemoryCreate(memKey, size, permission)
 *
 * Attempt to create a new shared memory segment with the specified key.
 * Will fail (return NULL) if such a segment already exists.  If successful,
 * attach the segment to the current process and return its attached address.
 * On success, callbacks are registered with on_shmem_exit to detach and
 * delete the segment when on_shmem_exit is called.
 *
 * If we fail with a failure code other than collision-with-existing-segment,
 * print out an error and abort.  Other types of errors are not recoverable.
 * ----------------------------------------------------------------
 */
static void *
InternalIpcMemoryCreate(IpcMemoryKey memKey, uint32 size, int permission)
513
{
514
	IpcMemoryId shmid;
515
	void	   *memAddress;
516

517
	shmid = shmget(memKey, size, IPC_CREAT | IPC_EXCL | permission);
518 519 520

	if (shmid < 0)
	{
521 522 523
		/*
		 * Fail quietly if error indicates a collision with existing segment.
		 * One would expect EEXIST, given that we said IPC_EXCL, but perhaps
524 525
		 * we could get a permission violation instead?  Also, EIDRM might
		 * occur if an old seg is slated for destruction but not gone yet.
526
		 */
527 528 529 530 531
		if (errno == EEXIST || errno == EACCES
#ifdef EIDRM
			|| errno == EIDRM
#endif
			)
532 533 534 535 536 537
			return NULL;
		/*
		 * Else complain and abort
		 */
		fprintf(stderr, "IpcMemoryCreate: shmget(key=%d, size=%u, 0%o) failed: %s\n",
				(int) memKey, size, (IPC_CREAT | IPC_EXCL | permission),
538 539 540 541 542 543 544
				strerror(errno));

		if (errno == EINVAL)
			fprintf(stderr,
					"\nThis error can be caused by one of three things:\n\n"
					"1. The maximum size for shared memory segments on your system was\n"
					"   exceeded.  You need to raise the SHMMAX parameter in your kernel\n"
545
					"   to be at least %u bytes.\n\n"
546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564
					"2. The requested shared memory segment was too small for your system.\n"
					"   You need to lower the SHMMIN parameter in your kernel.\n\n"
					"3. The requested shared memory segment already exists but is of the\n"
					"   wrong size.  This is most likely the case if an old version of\n"
					"   PostgreSQL crashed and didn't clean up.  The `ipcclean' utility\n"
					"   can be used to remedy this.\n\n"
					"The PostgreSQL Administrator's Guide contains more information about\n"
					"shared memory configuration.\n\n",
					size);

		else if (errno == ENOSPC)
			fprintf(stderr,
					"\nThis error does *not* mean that you have run out of disk space.\n\n"
					"It occurs either if all available shared memory ids have been taken,\n"
					"in which case you need to raise the SHMMNI parameter in your kernel,\n"
					"or because the system's overall limit for shared memory has been\n"
					"reached.  The PostgreSQL Administrator's Guide contains more\n"
					"information about shared memory configuration.\n\n");

565
		proc_exit(1);
566 567
	}

568 569
	/* Register on-exit routine to delete the new segment */
	on_shmem_exit(IpcMemoryDelete, Int32GetDatum(shmid));
570

571 572
	/* OK, should be able to attach to the segment */
	memAddress = shmat(shmid, 0, 0);
573

574
	if (memAddress == (void *) -1)
575
	{
576 577 578
        fprintf(stderr, "IpcMemoryCreate: shmat(id=%d) failed: %s\n",
				shmid, strerror(errno));
		proc_exit(1);
579 580
	}

581 582 583 584
	/* Register on-exit routine to detach new segment before deleting */
	on_shmem_exit(IpcMemoryDetach, PointerGetDatum(memAddress));

	return memAddress;
585 586 587
}

/****************************************************************************/
588
/*	IpcMemoryDetach(status, shmaddr)	removes a shared memory segment		*/
589 590
/*										from process' address spaceq		*/
/*	(called as an on_shmem_exit callback, hence funny argument list)		*/
591
/****************************************************************************/
592
static void
593
IpcMemoryDetach(int status, Datum shmaddr)
594
{
595 596 597 598 599 600
	if (shmdt(DatumGetPointer(shmaddr)) < 0)
		fprintf(stderr, "IpcMemoryDetach: shmdt(%p) failed: %s\n",
				DatumGetPointer(shmaddr), strerror(errno));
	/* We used to report a failure via elog(NOTICE), but that's pretty
	 * pointless considering any client has long since disconnected ...
	 */
601 602 603
}

/****************************************************************************/
604 605
/*	IpcMemoryDelete(status, shmId)		deletes a shared memory segment		*/
/*	(called as an on_shmem_exit callback, hence funny argument list)		*/
606
/****************************************************************************/
607 608
static void
IpcMemoryDelete(int status, Datum shmId)
609
{
610 611 612 613 614 615 616
	if (shmctl(DatumGetInt32(shmId), IPC_RMID, (struct shmid_ds *) NULL) < 0)
		fprintf(stderr, "IpcMemoryDelete: shmctl(%d, %d, 0) failed: %s\n",
				DatumGetInt32(shmId), IPC_RMID, strerror(errno));
	/* We used to report a failure via elog(NOTICE), but that's pretty
	 * pointless considering any client has long since disconnected ...
	 */
}
617

618 619 620 621 622 623 624 625
/* ----------------------------------------------------------------
 *						private memory support
 *
 * Rather than allocating shmem segments with IPC_PRIVATE key, we
 * just malloc() the requested amount of space.  This code emulates
 * the needed shmem functions.
 * ----------------------------------------------------------------
 */
626

627 628 629 630 631 632 633
static void *
PrivateMemoryCreate(uint32 size)
{
	void	   *memAddress;

	memAddress = malloc(size);
	if (!memAddress)
634
	{
635 636
		fprintf(stderr, "PrivateMemoryCreate: malloc(%u) failed\n", size);
		proc_exit(1);
637
	}
638
	MemSet(memAddress, 0, size);		/* keep Purify quiet */
639

640 641
	/* Register on-exit routine to release storage */
	on_shmem_exit(PrivateMemoryDelete, PointerGetDatum(memAddress));
642

643
	return memAddress;
644 645
}

646 647
static void
PrivateMemoryDelete(int status, Datum memaddr)
648
{
649
	free(DatumGetPointer(memaddr));
650
}
651

652

653
/* ------------------
654 655 656 657
 *				Routines to assign keys for new IPC objects
 *
 * The idea here is to detect and re-use keys that may have been assigned
 * by a crashed postmaster or backend.
658 659
 * ------------------
 */
660

661 662
static IpcMemoryKey NextShmemSegID = 0;
static IpcSemaphoreKey NextSemaID = 0;
663

664 665 666 667 668 669 670 671 672 673
/*
 * (Re) initialize key assignment at startup of postmaster or standalone
 * backend, also at postmaster reset.
 */
void
IpcInitKeyAssignment(int port)
{
	NextShmemSegID = port * 1000;
	NextSemaID = port * 1000;
}
674

675 676 677 678 679 680
/*
 * Create a shared memory segment of the given size and initialize its
 * standard header.  Dead Postgres segments are recycled if found,
 * but we do not fail upon collision with non-Postgres shmem segments.
 */
PGShmemHeader *
681
IpcMemoryCreate(uint32 size, bool makePrivate, int permission)
682 683 684
{
	void   *memAddress;
	PGShmemHeader *hdr;
685

686 687
	/* Room for a header? */
	Assert(size > MAXALIGN(sizeof(PGShmemHeader)));
688

689 690
	/* Loop till we find a free IPC key */
	for (NextShmemSegID++ ; ; NextShmemSegID++)
691
	{
692 693 694
		IpcMemoryId shmid;

		/* Special case if creating a private segment --- just malloc() it */
695
		if (makePrivate)
696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752
		{
			memAddress = PrivateMemoryCreate(size);
			break;
		}

		/* Try to create new segment */
		memAddress = InternalIpcMemoryCreate(NextShmemSegID, size, permission);
		if (memAddress)
			break;				/* successful create and attach */

		/* See if it looks to be leftover from a dead Postgres process */
		shmid = shmget(NextShmemSegID, sizeof(PGShmemHeader), 0);
		if (shmid < 0)
			continue;			/* failed: must be some other app's */
		memAddress = shmat(shmid, 0, 0);
		if (memAddress == (void *) -1)
			continue;			/* failed: must be some other app's */
		hdr = (PGShmemHeader *) memAddress;
		if (hdr->magic != PGShmemMagic)
		{
			shmdt(memAddress);
			continue;			/* segment belongs to a non-Postgres app */
		}
		/*
		 * If the creator PID is my own PID or does not belong to any
		 * extant process, it's safe to zap it.
		 */
		if (hdr->creatorPID != getpid())
		{
			if (kill(hdr->creatorPID, 0) == 0 ||
				errno != ESRCH)
			{
				shmdt(memAddress);
				continue;		/* segment belongs to a live process */
			}
		}
		/*
		 * The segment appears to be from a dead Postgres process, or
		 * from a previous cycle of life in this same process.  Zap it,
		 * if possible.  This probably shouldn't fail, but if it does,
		 * assume the segment belongs to someone else after all,
		 * and continue quietly.
		 */
		shmdt(memAddress);
		if (shmctl(shmid, IPC_RMID, (struct shmid_ds *) NULL) < 0)
			continue;
		/*
		 * Now try again to create the segment.
		 */
		memAddress = InternalIpcMemoryCreate(NextShmemSegID, size, permission);
		if (memAddress)
			break;				/* successful create and attach */
		/*
		 * Can only get here if some other process managed to create the
		 * same shmem key before we did.  Let him have that one,
		 * loop around to try next key.
		 */
753
	}
754 755 756 757 758 759 760 761 762 763 764 765 766 767
	/*
	 * OK, we created a new segment.  Mark it as created by this process.
	 * The order of assignments here is critical so that another Postgres
	 * process can't see the header as valid but belonging to an invalid
	 * PID!
	 */
	hdr = (PGShmemHeader *) memAddress;
	hdr->creatorPID = getpid();
	hdr->magic = PGShmemMagic;
	/*
	 * Initialize space allocation status for segment.
	 */
	hdr->totalsize = size;
	hdr->freeoffset = MAXALIGN(sizeof(PGShmemHeader));
768

769
	return hdr;
770 771
}

772 773 774 775 776 777 778 779 780
/*
 * Create a semaphore set with the given number of useful semaphores
 * (an additional sema is actually allocated to serve as identifier).
 * Dead Postgres sema sets are recycled if found, but we do not fail
 * upon collision with non-Postgres sema sets.
 */
IpcSemaphoreId
IpcSemaphoreCreate(int numSems, int permission,
				   int semStartValue, bool removeOnExit)
781
{
782 783
	IpcSemaphoreId	semId;
	union semun semun;
784

785 786 787 788 789 790 791 792 793 794 795
	/* Loop till we find a free IPC key */
	for (NextSemaID++ ; ; NextSemaID++)
	{
		pid_t	creatorPID;

		/* Try to create new semaphore set */
		semId = InternalIpcSemaphoreCreate(NextSemaID, numSems+1,
										   permission, semStartValue,
										   removeOnExit);
		if (semId >= 0)
			break;				/* successful create */
796

797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863
		/* See if it looks to be leftover from a dead Postgres process */
		semId = semget(NextSemaID, numSems+1, 0);
		if (semId < 0)
			continue;			/* failed: must be some other app's */
		if (IpcSemaphoreGetValue(semId, numSems) != PGSemaMagic)
			continue;			/* sema belongs to a non-Postgres app */
		/*
		 * If the creator PID is my own PID or does not belong to any
		 * extant process, it's safe to zap it.
		 */
		creatorPID = IpcSemaphoreGetLastPID(semId, numSems);
		if (creatorPID <= 0)
			continue;			/* oops, GETPID failed */
		if (creatorPID != getpid())
		{
			if (kill(creatorPID, 0) == 0 ||
				errno != ESRCH)
				continue;		/* sema belongs to a live process */
		}
		/*
		 * The sema set appears to be from a dead Postgres process, or
		 * from a previous cycle of life in this same process.  Zap it,
		 * if possible.  This probably shouldn't fail, but if it does,
		 * assume the sema set belongs to someone else after all,
		 * and continue quietly.
		 */
		semun.val = 0;			/* unused, but keep compiler quiet */
		if (semctl(semId, 0, IPC_RMID, semun) < 0)
			continue;
		/*
		 * Now try again to create the sema set.
		 */
		semId = InternalIpcSemaphoreCreate(NextSemaID, numSems+1,
										   permission, semStartValue,
										   removeOnExit);
		if (semId >= 0)
			break;				/* successful create */
		/*
		 * Can only get here if some other process managed to create the
		 * same sema key before we did.  Let him have that one,
		 * loop around to try next key.
		 */
	}
	/*
	 * OK, we created a new sema set.  Mark it as created by this process.
	 * We do this by setting the spare semaphore to PGSemaMagic-1 and then
	 * incrementing it with semop().  That leaves it with value PGSemaMagic
	 * and sempid referencing this process.
	 */
	semun.val = PGSemaMagic-1;
	if (semctl(semId, numSems, SETVAL, semun) < 0)
	{
		fprintf(stderr, "IpcSemaphoreCreate: semctl(id=%d, %d, SETVAL, %d) failed: %s\n",
				semId, numSems, PGSemaMagic-1, strerror(errno));

		if (errno == ERANGE)
			fprintf(stderr,
					"You possibly need to raise your kernel's SEMVMX value to be at least\n"
					"%d.  Look into the PostgreSQL documentation for details.\n",
					PGSemaMagic);

		proc_exit(1);
	}
	IpcSemaphoreUnlock(semId, numSems);

	return semId;
}