ipc.c 29.4 KB
Newer Older
1
/*-------------------------------------------------------------------------
2
 *
3
 * ipc.c
4
 *	  POSTGRES inter-process communication definitions.
5
 *
6
 * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group
B
Add:  
Bruce Momjian 已提交
7
 * Portions Copyright (c) 1994, Regents of the University of California
8 9 10
 *
 *
 * IDENTIFICATION
11
 *	  $Header: /cvsroot/pgsql/src/backend/storage/ipc/ipc.c,v 1.69 2001/09/29 04:02:23 tgl Exp $
12 13 14
 *
 * NOTES
 *
15 16 17 18 19 20 21 22 23 24
 *	  Currently, semaphores are used (my understanding anyway) in two
 *	  different ways:
 *		1. as mutexes on machines that don't have test-and-set (eg.
 *		   mips R3000).
 *		2. for putting processes to sleep when waiting on a lock
 *		   and waking them up when the lock is free.
 *	  The number of semaphores in (1) is fixed and those are shared
 *	  among all backends. In (2), there is 1 semaphore per process and those
 *	  are not shared with anyone else.
 *														  -ay 4/95
25 26 27
 *
 *-------------------------------------------------------------------------
 */
28 29
#include "postgres.h"

30 31 32
#include <sys/types.h>
#include <sys/file.h>
#include <errno.h>
33
#include <signal.h>
34
#include <unistd.h>
35 36

#include "storage/ipc.h"
37
/* In Ultrix, sem.h and shm.h must be included AFTER ipc.h */
38
#ifdef HAVE_SYS_SEM_H
39
#include <sys/sem.h>
40 41
#endif
#ifdef HAVE_SYS_SHM_H
42
#include <sys/shm.h>
43 44 45 46
#endif
#ifdef HAVE_KERNEL_OS_H
#include <kernel/OS.h>
#endif
47

B
Bruce Momjian 已提交
48
#if defined(solaris_sparc)
49 50 51
#include <sys/ipc.h>
#endif

52 53 54 55 56 57 58 59
#if defined(__darwin__)
#include "port/darwin/sem.h"
#endif

#include "miscadmin.h"
#include "utils/memutils.h"
#include "libpq/libpq.h"

60

61 62 63 64 65
/*
 * This flag is set during proc_exit() to change elog()'s behavior,
 * so that an elog() from an on_proc_exit routine cannot get us out
 * of the exit procedure.  We do NOT want to go back to the idle loop...
 */
66
bool		proc_exit_inprogress = false;
67

68 69 70 71 72
static IpcSemaphoreId InternalIpcSemaphoreCreate(IpcSemaphoreKey semKey,
						   int numSems, int permission,
						   int semStartValue, bool removeOnExit);
static void CallbackSemaphoreKill(int status, Datum semId);
static void *InternalIpcMemoryCreate(IpcMemoryKey memKey, uint32 size,
B
Bruce Momjian 已提交
73
						int permission);
74 75 76 77
static void IpcMemoryDetach(int status, Datum shmaddr);
static void IpcMemoryDelete(int status, Datum shmId);
static void *PrivateMemoryCreate(uint32 size);
static void PrivateMemoryDelete(int status, Datum memaddr);
78

79

80
/* ----------------------------------------------------------------
81
 *						exit() handling stuff
82 83 84 85 86 87 88 89 90 91 92
 *
 * These functions are in generally the same spirit as atexit(2),
 * but provide some additional features we need --- in particular,
 * we want to register callbacks to invoke when we are disconnecting
 * from a broken shared-memory context but not exiting the postmaster.
 *
 * Callback functions can take zero, one, or two args: the first passed
 * arg is the integer exitcode, the second is the Datum supplied when
 * the callback was registered.
 *
 * XXX these functions probably ought to live in some other module.
93 94 95 96 97
 * ----------------------------------------------------------------
 */

#define MAX_ON_EXITS 20

98 99
static struct ONEXIT
{
100
	void		(*function) ();
101
	Datum		arg;
102
}			on_proc_exit_list[MAX_ON_EXITS],
B
Bruce Momjian 已提交
103

104
			on_shmem_exit_list[MAX_ON_EXITS];
105

106 107
static int	on_proc_exit_index,
			on_shmem_exit_index;
108 109 110


/* ----------------------------------------------------------------
111
 *		proc_exit
112
 *
113 114 115 116
 *		this function calls all the callbacks registered
 *		for it (to free resources) and then calls exit.
 *		This should be the only function to call exit().
 *		-cim 2/6/90
117 118 119
 * ----------------------------------------------------------------
 */
void
120
proc_exit(int code)
121
{
122

123
	/*
124 125
	 * Once we set this flag, we are committed to exit.  Any elog() will
	 * NOT send control back to the main loop, but right back here.
M
 
Marc G. Fournier 已提交
126
	 */
127
	proc_exit_inprogress = true;
B
Bruce Momjian 已提交
128

129
	/*
B
Bruce Momjian 已提交
130 131 132
	 * Forget any pending cancel or die requests; we're doing our best to
	 * close up shop already.  Note that the signal handlers will not set
	 * these flags again, now that proc_exit_inprogress is set.
133
	 */
134
	InterruptPending = false;
135
	ProcDiePending = false;
136 137 138
	QueryCancelPending = false;
	/* And let's just make *sure* we're not interrupted ... */
	ImmediateInterruptOK = false;
139 140
	InterruptHoldoffCount = 1;
	CritSectionCount = 0;
141

142 143
	if (DebugLvl > 1)
		elog(DEBUG, "proc_exit(%d)", code);
144

145 146
	/* do our shared memory exits first */
	shmem_exit(code);
147

148 149
	/*
	 * call all the callbacks registered before calling exit().
150
	 *
151 152 153 154 155
	 * Note that since we decrement on_proc_exit_index each time, if a
	 * callback calls elog(ERROR) or elog(FATAL) then it won't be invoked
	 * again when control comes back here (nor will the
	 * previously-completed callbacks).  So, an infinite loop should not
	 * be possible.
156
	 */
157 158
	while (--on_proc_exit_index >= 0)
		(*on_proc_exit_list[on_proc_exit_index].function) (code,
159
							  on_proc_exit_list[on_proc_exit_index].arg);
160

161 162
	if (DebugLvl > 1)
		elog(DEBUG, "exit(%d)", code);
163
	exit(code);
164 165 166
}

/* ------------------
167
 * Run all of the on_shmem_exit routines --- but don't actually exit.
168
 * This is used by the postmaster to re-initialize shared memory and
169
 * semaphores after a backend dies horribly.
170 171 172
 * ------------------
 */
void
173
shmem_exit(int code)
174
{
175 176
	if (DebugLvl > 1)
		elog(DEBUG, "shmem_exit(%d)", code);
M
 
Marc G. Fournier 已提交
177

178 179
	/*
	 * call all the registered callbacks.
180
	 *
181 182
	 * As with proc_exit(), we remove each callback from the list before
	 * calling it, to avoid infinite loop in case of error.
183
	 */
184 185
	while (--on_shmem_exit_index >= 0)
		(*on_shmem_exit_list[on_shmem_exit_index].function) (code,
186
							on_shmem_exit_list[on_shmem_exit_index].arg);
187 188 189

	on_shmem_exit_index = 0;
}
190

191 192 193 194 195 196 197
/* ----------------------------------------------------------------
 *		on_proc_exit
 *
 *		this function adds a callback function to the list of
 *		functions invoked by proc_exit().	-cim 2/6/90
 * ----------------------------------------------------------------
 */
198
void
B
Bruce Momjian 已提交
199
			on_proc_exit(void (*function) (), Datum arg)
200 201
{
	if (on_proc_exit_index >= MAX_ON_EXITS)
202
		elog(FATAL, "Out of on_proc_exit slots");
203 204 205 206 207

	on_proc_exit_list[on_proc_exit_index].function = function;
	on_proc_exit_list[on_proc_exit_index].arg = arg;

	++on_proc_exit_index;
208 209 210
}

/* ----------------------------------------------------------------
211
 *		on_shmem_exit
212
 *
213
 *		this function adds a callback function to the list of
214
 *		functions invoked by shmem_exit().	-cim 2/6/90
215 216
 * ----------------------------------------------------------------
 */
217
void
B
Bruce Momjian 已提交
218
			on_shmem_exit(void (*function) (), Datum arg)
219
{
220
	if (on_shmem_exit_index >= MAX_ON_EXITS)
221
		elog(FATAL, "Out of on_shmem_exit slots");
222

223 224
	on_shmem_exit_list[on_shmem_exit_index].function = function;
	on_shmem_exit_list[on_shmem_exit_index].arg = arg;
225

226
	++on_shmem_exit_index;
227 228
}

229
/* ----------------------------------------------------------------
230
 *		on_exit_reset
231
 *
232 233 234 235
 *		this function clears all on_proc_exit() and on_shmem_exit()
 *		registered functions.  This is used just after forking a backend,
 *		so that the backend doesn't believe it should call the postmaster's
 *		on-exit routines when it exits...
236 237 238
 * ----------------------------------------------------------------
 */
void
239
on_exit_reset(void)
240
{
241
	on_shmem_exit_index = 0;
242
	on_proc_exit_index = 0;
243 244
}

245

246 247
/* ----------------------------------------------------------------
 *						Semaphore support
248
 *
249 250 251
 * These routines represent a fairly thin layer on top of SysV semaphore
 * functionality.
 * ----------------------------------------------------------------
252 253
 */

254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270
/* ----------------------------------------------------------------
 *	InternalIpcSemaphoreCreate(semKey, numSems, permission,
 *							   semStartValue, removeOnExit)
 *
 * Attempt to create a new semaphore set with the specified key.
 * Will fail (return -1) if such a set already exists.
 * On success, a callback is optionally registered with on_shmem_exit
 * to delete the semaphore set when on_shmem_exit is called.
 *
 * If we fail with a failure code other than collision-with-existing-set,
 * print out an error and abort.  Other types of errors are not recoverable.
 * ----------------------------------------------------------------
 */
static IpcSemaphoreId
InternalIpcSemaphoreCreate(IpcSemaphoreKey semKey,
						   int numSems, int permission,
						   int semStartValue, bool removeOnExit)
271
{
272
	int			semId;
273 274 275
	int			i;
	u_short		array[IPC_NMAXSEM];
	union semun semun;
276

277
	Assert(numSems > 0 && numSems <= IPC_NMAXSEM);
278

279
	semId = semget(semKey, numSems, IPC_CREAT | IPC_EXCL | permission);
280

281
	if (semId < 0)
282
	{
B
Bruce Momjian 已提交
283

284 285
		/*
		 * Fail quietly if error indicates a collision with existing set.
B
Bruce Momjian 已提交
286 287 288 289
		 * One would expect EEXIST, given that we said IPC_EXCL, but
		 * perhaps we could get a permission violation instead?  Also,
		 * EIDRM might occur if an old set is slated for destruction but
		 * not gone yet.
290
		 */
291 292 293 294 295
		if (errno == EEXIST || errno == EACCES
#ifdef EIDRM
			|| errno == EIDRM
#endif
			)
296
			return -1;
B
Bruce Momjian 已提交
297

298 299 300 301
		/*
		 * Else complain and abort
		 */
		fprintf(stderr, "IpcSemaphoreCreate: semget(key=%d, num=%d, 0%o) failed: %s\n",
B
Bruce Momjian 已提交
302
			  (int) semKey, numSems, (IPC_CREAT | IPC_EXCL | permission),
303
				strerror(errno));
304

305 306 307
		if (errno == ENOSPC)
			fprintf(stderr,
					"\nThis error does *not* mean that you have run out of disk space.\n\n"
308
					"It occurs because either the system limit for the maximum number of\n"
309 310 311 312
					"semaphore sets (SEMMNI), or the system wide maximum number of\n"
					"semaphores (SEMMNS), would be exceeded.  You need to raise the\n"
					"respective kernel parameter.  Look into the PostgreSQL documentation\n"
					"for details.\n\n");
313

314 315
		proc_exit(1);
	}
316

317 318 319 320 321 322 323 324
	/* Initialize new semas to specified start value */
	for (i = 0; i < numSems; i++)
		array[i] = semStartValue;
	semun.array = array;
	if (semctl(semId, 0, SETALL, semun) < 0)
	{
		fprintf(stderr, "IpcSemaphoreCreate: semctl(id=%d, 0, SETALL, ...) failed: %s\n",
				semId, strerror(errno));
325

326 327 328
		if (errno == ERANGE)
			fprintf(stderr,
					"You possibly need to raise your kernel's SEMVMX value to be at least\n"
B
Bruce Momjian 已提交
329
			"%d.  Look into the PostgreSQL documentation for details.\n",
330
					semStartValue);
331

332 333
		IpcSemaphoreKill(semId);
		proc_exit(1);
334 335
	}

336 337 338
	/* Register on-exit routine to delete the new set */
	if (removeOnExit)
		on_shmem_exit(CallbackSemaphoreKill, Int32GetDatum(semId));
339

340
	return semId;
341 342 343
}

/****************************************************************************/
344
/*	 IpcSemaphoreKill(semId)	- removes a semaphore set					*/
345
/*																			*/
346 347
/****************************************************************************/
void
348
IpcSemaphoreKill(IpcSemaphoreId semId)
349
{
350
	union semun semun;
351

B
Bruce Momjian 已提交
352
	semun.val = 0;				/* unused, but keep compiler quiet */
353

354 355
	if (semctl(semId, 0, IPC_RMID, semun) < 0)
		fprintf(stderr, "IpcSemaphoreKill: semctl(%d, 0, IPC_RMID, ...) failed: %s\n",
356
				semId, strerror(errno));
B
Bruce Momjian 已提交
357 358 359

	/*
	 * We used to report a failure via elog(NOTICE), but that's pretty
360 361
	 * pointless considering any client has long since disconnected ...
	 */
362
}
363

364
/****************************************************************************/
365 366
/*	 CallbackSemaphoreKill(status, semId)									*/
/*	(called as an on_shmem_exit callback, hence funny argument list)		*/
367
/****************************************************************************/
368 369
static void
CallbackSemaphoreKill(int status, Datum semId)
370
{
371
	IpcSemaphoreKill(DatumGetInt32(semId));
372 373 374
}

/****************************************************************************/
375
/*	 IpcSemaphoreLock(semId, sem) - locks a semaphore						*/
376 377
/****************************************************************************/
void
378
IpcSemaphoreLock(IpcSemaphoreId semId, int sem, bool interruptOK)
379
{
380 381
	int			errStatus;
	struct sembuf sops;
382

383
	sops.sem_op = -1;			/* decrement */
384 385 386
	sops.sem_flg = 0;
	sops.sem_num = sem;

387 388 389 390
	/*
	 * Note: if errStatus is -1 and errno == EINTR then it means we
	 * returned from the operation prematurely because we were sent a
	 * signal.	So we try and lock the semaphore again.
391
	 *
392 393 394 395 396
	 * Each time around the loop, we check for a cancel/die interrupt. We
	 * assume that if such an interrupt comes in while we are waiting, it
	 * will cause the semop() call to exit with errno == EINTR, so that we
	 * will be able to service the interrupt (if not in a critical section
	 * already).
397
	 *
398 399 400
	 * Once we acquire the lock, we do NOT check for an interrupt before
	 * returning.  The caller needs to be able to record ownership of the
	 * lock before any interrupt can be accepted.
401
	 *
402 403 404 405
	 * There is a window of a few instructions between CHECK_FOR_INTERRUPTS
	 * and entering the semop() call.  If a cancel/die interrupt occurs in
	 * that window, we would fail to notice it until after we acquire the
	 * lock (or get another interrupt to escape the semop()).  We can
406
	 * avoid this problem by temporarily setting ImmediateInterruptOK to
407 408 409 410 411 412 413 414 415 416 417
	 * true before we do CHECK_FOR_INTERRUPTS; then, a die() interrupt in
	 * this interval will execute directly.  However, there is a huge
	 * pitfall: there is another window of a few instructions after the
	 * semop() before we are able to reset ImmediateInterruptOK.  If an
	 * interrupt occurs then, we'll lose control, which means that the
	 * lock has been acquired but our caller did not get a chance to
	 * record the fact. Therefore, we only set ImmediateInterruptOK if the
	 * caller tells us it's OK to do so, ie, the caller does not need to
	 * record acquiring the lock.  (This is currently true for lockmanager
	 * locks, since the process that granted us the lock did all the
	 * necessary state updates. It's not true for SysV semaphores used to
418 419
	 * implement LW locks or emulate spinlocks --- but the wait time for
	 * such locks should not be very long, anyway.)
420 421 422
	 */
	do
	{
423 424
		ImmediateInterruptOK = interruptOK;
		CHECK_FOR_INTERRUPTS();
425
		errStatus = semop(semId, &sops, 1);
426
		ImmediateInterruptOK = false;
427 428 429 430
	} while (errStatus == -1 && errno == EINTR);

	if (errStatus == -1)
	{
B
Bruce Momjian 已提交
431
		fprintf(stderr, "IpcSemaphoreLock: semop(id=%d) failed: %s\n",
432
				semId, strerror(errno));
433
		proc_exit(255);
434
	}
435 436 437
}

/****************************************************************************/
438
/*	 IpcSemaphoreUnlock(semId, sem)		- unlocks a semaphore				*/
439 440
/****************************************************************************/
void
441
IpcSemaphoreUnlock(IpcSemaphoreId semId, int sem)
442
{
443 444
	int			errStatus;
	struct sembuf sops;
445

446
	sops.sem_op = 1;			/* increment */
447 448 449 450
	sops.sem_flg = 0;
	sops.sem_num = sem;


451 452 453 454 455
	/*
	 * Note: if errStatus is -1 and errno == EINTR then it means we
	 * returned from the operation prematurely because we were sent a
	 * signal.	So we try and unlock the semaphore again. Not clear this
	 * can really happen, but might as well cope.
456 457 458 459 460 461 462 463
	 */
	do
	{
		errStatus = semop(semId, &sops, 1);
	} while (errStatus == -1 && errno == EINTR);

	if (errStatus == -1)
	{
464 465
		fprintf(stderr, "IpcSemaphoreUnlock: semop(id=%d) failed: %s\n",
				semId, strerror(errno));
466
		proc_exit(255);
467
	}
468 469
}

470 471 472 473 474 475 476 477 478 479 480 481 482 483
/****************************************************************************/
/*	 IpcSemaphoreTryLock(semId, sem)	- conditionally locks a semaphore	*/
/* Lock the semaphore if it's free, but don't block.						*/
/****************************************************************************/
bool
IpcSemaphoreTryLock(IpcSemaphoreId semId, int sem)
{
	int			errStatus;
	struct sembuf sops;

	sops.sem_op = -1;			/* decrement */
	sops.sem_flg = IPC_NOWAIT;	/* but don't block */
	sops.sem_num = sem;

484 485 486 487
	/*
	 * Note: if errStatus is -1 and errno == EINTR then it means we
	 * returned from the operation prematurely because we were sent a
	 * signal.	So we try and lock the semaphore again.
488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505
	 */
	do
	{
		errStatus = semop(semId, &sops, 1);
	} while (errStatus == -1 && errno == EINTR);

	if (errStatus == -1)
	{
		/* Expect EAGAIN or EWOULDBLOCK (platform-dependent) */
#ifdef EAGAIN
		if (errno == EAGAIN)
			return false;		/* failed to lock it */
#endif
#if defined(EWOULDBLOCK) && (!defined(EAGAIN) || (EWOULDBLOCK != EAGAIN))
		if (errno == EWOULDBLOCK)
			return false;		/* failed to lock it */
#endif
		/* Otherwise we got trouble */
B
Bruce Momjian 已提交
506
		fprintf(stderr, "IpcSemaphoreTryLock: semop(id=%d) failed: %s\n",
507 508 509 510 511 512 513 514
				semId, strerror(errno));
		proc_exit(255);
	}

	return true;
}

/* Get the current value (semval) of the semaphore */
515
int
516
IpcSemaphoreGetValue(IpcSemaphoreId semId, int sem)
517
{
518
	union semun dummy;			/* for Solaris */
B
Bruce Momjian 已提交
519 520

	dummy.val = 0;				/* unused */
521

522
	return semctl(semId, sem, GETVAL, dummy);
523 524
}

525 526 527
/* Get the PID of the last process to do semop() on the semaphore */
static pid_t
IpcSemaphoreGetLastPID(IpcSemaphoreId semId, int sem)
528
{
529
	union semun dummy;			/* for Solaris */
B
Bruce Momjian 已提交
530 531

	dummy.val = 0;				/* unused */
532

533
	return semctl(semId, sem, GETPID, dummy);
534 535 536
}


537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559
/* ----------------------------------------------------------------
 *						Shared memory support
 *
 * These routines represent a fairly thin layer on top of SysV shared
 * memory functionality.
 * ----------------------------------------------------------------
 */

/* ----------------------------------------------------------------
 *	InternalIpcMemoryCreate(memKey, size, permission)
 *
 * Attempt to create a new shared memory segment with the specified key.
 * Will fail (return NULL) if such a segment already exists.  If successful,
 * attach the segment to the current process and return its attached address.
 * On success, callbacks are registered with on_shmem_exit to detach and
 * delete the segment when on_shmem_exit is called.
 *
 * If we fail with a failure code other than collision-with-existing-segment,
 * print out an error and abort.  Other types of errors are not recoverable.
 * ----------------------------------------------------------------
 */
static void *
InternalIpcMemoryCreate(IpcMemoryKey memKey, uint32 size, int permission)
560
{
561
	IpcMemoryId shmid;
562
	void	   *memAddress;
563

564
	shmid = shmget(memKey, size, IPC_CREAT | IPC_EXCL | permission);
565 566 567

	if (shmid < 0)
	{
568
		/*
B
Bruce Momjian 已提交
569 570 571 572 573
		 * Fail quietly if error indicates a collision with existing
		 * segment. One would expect EEXIST, given that we said IPC_EXCL,
		 * but perhaps we could get a permission violation instead?  Also,
		 * EIDRM might occur if an old seg is slated for destruction but
		 * not gone yet.
574
		 */
575 576 577 578 579
		if (errno == EEXIST || errno == EACCES
#ifdef EIDRM
			|| errno == EIDRM
#endif
			)
580
			return NULL;
B
Bruce Momjian 已提交
581

582 583 584 585 586
		/*
		 * Else complain and abort
		 */
		fprintf(stderr, "IpcMemoryCreate: shmget(key=%d, size=%u, 0%o) failed: %s\n",
				(int) memKey, size, (IPC_CREAT | IPC_EXCL | permission),
587 588 589 590
				strerror(errno));

		if (errno == EINVAL)
			fprintf(stderr,
B
Bruce Momjian 已提交
591
				 "\nThis error can be caused by one of three things:\n\n"
592 593
					"1. The maximum size for shared memory segments on your system was\n"
					"   exceeded.  You need to raise the SHMMAX parameter in your kernel\n"
594
					"   to be at least %u bytes.\n\n"
595 596 597
					"2. The requested shared memory segment was too small for your system.\n"
					"   You need to lower the SHMMIN parameter in your kernel.\n\n"
					"3. The requested shared memory segment already exists but is of the\n"
598 599
					"   wrong size.  This can occur if some other application on your system\n"
					"   is also using shared memory.\n\n"
600 601 602 603 604 605 606 607 608 609 610 611 612
					"The PostgreSQL Administrator's Guide contains more information about\n"
					"shared memory configuration.\n\n",
					size);

		else if (errno == ENOSPC)
			fprintf(stderr,
					"\nThis error does *not* mean that you have run out of disk space.\n\n"
					"It occurs either if all available shared memory ids have been taken,\n"
					"in which case you need to raise the SHMMNI parameter in your kernel,\n"
					"or because the system's overall limit for shared memory has been\n"
					"reached.  The PostgreSQL Administrator's Guide contains more\n"
					"information about shared memory configuration.\n\n");

613
		proc_exit(1);
614 615
	}

616 617
	/* Register on-exit routine to delete the new segment */
	on_shmem_exit(IpcMemoryDelete, Int32GetDatum(shmid));
618

619 620
	/* OK, should be able to attach to the segment */
	memAddress = shmat(shmid, 0, 0);
621

622
	if (memAddress == (void *) -1)
623
	{
B
Bruce Momjian 已提交
624
		fprintf(stderr, "IpcMemoryCreate: shmat(id=%d) failed: %s\n",
625 626
				shmid, strerror(errno));
		proc_exit(1);
627 628
	}

629 630 631
	/* Register on-exit routine to detach new segment before deleting */
	on_shmem_exit(IpcMemoryDetach, PointerGetDatum(memAddress));

T
Tom Lane 已提交
632 633 634
	/* Record key and ID in lockfile for data directory. */
	RecordSharedMemoryInLockFile(memKey, shmid);

635
	return memAddress;
636 637 638
}

/****************************************************************************/
639
/*	IpcMemoryDetach(status, shmaddr)	removes a shared memory segment		*/
640 641
/*										from process' address spaceq		*/
/*	(called as an on_shmem_exit callback, hence funny argument list)		*/
642
/****************************************************************************/
643
static void
644
IpcMemoryDetach(int status, Datum shmaddr)
645
{
646 647 648
	if (shmdt(DatumGetPointer(shmaddr)) < 0)
		fprintf(stderr, "IpcMemoryDetach: shmdt(%p) failed: %s\n",
				DatumGetPointer(shmaddr), strerror(errno));
B
Bruce Momjian 已提交
649 650 651

	/*
	 * We used to report a failure via elog(NOTICE), but that's pretty
652 653
	 * pointless considering any client has long since disconnected ...
	 */
654 655 656
}

/****************************************************************************/
657 658
/*	IpcMemoryDelete(status, shmId)		deletes a shared memory segment		*/
/*	(called as an on_shmem_exit callback, hence funny argument list)		*/
659
/****************************************************************************/
660 661
static void
IpcMemoryDelete(int status, Datum shmId)
662
{
663 664 665
	if (shmctl(DatumGetInt32(shmId), IPC_RMID, (struct shmid_ds *) NULL) < 0)
		fprintf(stderr, "IpcMemoryDelete: shmctl(%d, %d, 0) failed: %s\n",
				DatumGetInt32(shmId), IPC_RMID, strerror(errno));
B
Bruce Momjian 已提交
666 667 668

	/*
	 * We used to report a failure via elog(NOTICE), but that's pretty
669 670 671
	 * pointless considering any client has long since disconnected ...
	 */
}
672

T
Tom Lane 已提交
673 674 675 676 677 678
/****************************************************************************/
/*	SharedMemoryIsInUse(shmKey, shmId)	Is a shared memory segment in use?	*/
/****************************************************************************/
bool
SharedMemoryIsInUse(IpcMemoryKey shmKey, IpcMemoryId shmId)
{
B
Bruce Momjian 已提交
679
	struct shmid_ds shmStat;
T
Tom Lane 已提交
680 681

	/*
B
Bruce Momjian 已提交
682 683
	 * We detect whether a shared memory segment is in use by seeing
	 * whether it (a) exists and (b) has any processes are attached to it.
T
Tom Lane 已提交
684 685
	 *
	 * If we are unable to perform the stat operation for a reason other than
B
Bruce Momjian 已提交
686 687
	 * nonexistence of the segment (most likely, because it doesn't belong
	 * to our userid), assume it is in use.
T
Tom Lane 已提交
688 689 690
	 */
	if (shmctl(shmId, IPC_STAT, &shmStat) < 0)
	{
B
Bruce Momjian 已提交
691

T
Tom Lane 已提交
692 693
		/*
		 * EINVAL actually has multiple possible causes documented in the
B
Bruce Momjian 已提交
694 695
		 * shmctl man page, but we assume it must mean the segment no
		 * longer exists.
T
Tom Lane 已提交
696 697 698 699 700 701 702 703 704 705 706 707 708
		 */
		if (errno == EINVAL)
			return false;
		/* Else assume segment is in use */
		return true;
	}
	/* If it has attached processes, it's in use */
	if (shmStat.shm_nattch != 0)
		return true;
	return false;
}


709 710 711 712 713 714 715 716
/* ----------------------------------------------------------------
 *						private memory support
 *
 * Rather than allocating shmem segments with IPC_PRIVATE key, we
 * just malloc() the requested amount of space.  This code emulates
 * the needed shmem functions.
 * ----------------------------------------------------------------
 */
717

718 719 720 721 722 723 724
static void *
PrivateMemoryCreate(uint32 size)
{
	void	   *memAddress;

	memAddress = malloc(size);
	if (!memAddress)
725
	{
726 727
		fprintf(stderr, "PrivateMemoryCreate: malloc(%u) failed\n", size);
		proc_exit(1);
728
	}
B
Bruce Momjian 已提交
729
	MemSet(memAddress, 0, size);/* keep Purify quiet */
730

731 732
	/* Register on-exit routine to release storage */
	on_shmem_exit(PrivateMemoryDelete, PointerGetDatum(memAddress));
733

734
	return memAddress;
735 736
}

737 738
static void
PrivateMemoryDelete(int status, Datum memaddr)
739
{
740
	free(DatumGetPointer(memaddr));
741
}
742

743

744
/* ------------------
745 746 747 748
 *				Routines to assign keys for new IPC objects
 *
 * The idea here is to detect and re-use keys that may have been assigned
 * by a crashed postmaster or backend.
749 750
 * ------------------
 */
751

752 753
static IpcMemoryKey NextShmemSegID = 0;
static IpcSemaphoreKey NextSemaID = 0;
754

755 756 757 758 759 760 761 762 763 764
/*
 * (Re) initialize key assignment at startup of postmaster or standalone
 * backend, also at postmaster reset.
 */
void
IpcInitKeyAssignment(int port)
{
	NextShmemSegID = port * 1000;
	NextSemaID = port * 1000;
}
765

766 767 768 769 770 771
/*
 * Create a shared memory segment of the given size and initialize its
 * standard header.  Dead Postgres segments are recycled if found,
 * but we do not fail upon collision with non-Postgres shmem segments.
 */
PGShmemHeader *
772
IpcMemoryCreate(uint32 size, bool makePrivate, int permission)
773
{
B
Bruce Momjian 已提交
774
	void	   *memAddress;
775
	PGShmemHeader *hdr;
776

777 778
	/* Room for a header? */
	Assert(size > MAXALIGN(sizeof(PGShmemHeader)));
779

780
	/* Loop till we find a free IPC key */
B
Bruce Momjian 已提交
781
	for (NextShmemSegID++;; NextShmemSegID++)
782
	{
783 784 785
		IpcMemoryId shmid;

		/* Special case if creating a private segment --- just malloc() it */
786
		if (makePrivate)
787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809
		{
			memAddress = PrivateMemoryCreate(size);
			break;
		}

		/* Try to create new segment */
		memAddress = InternalIpcMemoryCreate(NextShmemSegID, size, permission);
		if (memAddress)
			break;				/* successful create and attach */

		/* See if it looks to be leftover from a dead Postgres process */
		shmid = shmget(NextShmemSegID, sizeof(PGShmemHeader), 0);
		if (shmid < 0)
			continue;			/* failed: must be some other app's */
		memAddress = shmat(shmid, 0, 0);
		if (memAddress == (void *) -1)
			continue;			/* failed: must be some other app's */
		hdr = (PGShmemHeader *) memAddress;
		if (hdr->magic != PGShmemMagic)
		{
			shmdt(memAddress);
			continue;			/* segment belongs to a non-Postgres app */
		}
B
Bruce Momjian 已提交
810

811 812 813 814 815 816 817 818 819 820 821 822 823
		/*
		 * If the creator PID is my own PID or does not belong to any
		 * extant process, it's safe to zap it.
		 */
		if (hdr->creatorPID != getpid())
		{
			if (kill(hdr->creatorPID, 0) == 0 ||
				errno != ESRCH)
			{
				shmdt(memAddress);
				continue;		/* segment belongs to a live process */
			}
		}
B
Bruce Momjian 已提交
824

825
		/*
B
Bruce Momjian 已提交
826 827 828 829 830
		 * The segment appears to be from a dead Postgres process, or from
		 * a previous cycle of life in this same process.  Zap it, if
		 * possible.  This probably shouldn't fail, but if it does, assume
		 * the segment belongs to someone else after all, and continue
		 * quietly.
831 832 833 834
		 */
		shmdt(memAddress);
		if (shmctl(shmid, IPC_RMID, (struct shmid_ds *) NULL) < 0)
			continue;
B
Bruce Momjian 已提交
835

836 837 838 839 840 841
		/*
		 * Now try again to create the segment.
		 */
		memAddress = InternalIpcMemoryCreate(NextShmemSegID, size, permission);
		if (memAddress)
			break;				/* successful create and attach */
B
Bruce Momjian 已提交
842

843 844
		/*
		 * Can only get here if some other process managed to create the
B
Bruce Momjian 已提交
845 846
		 * same shmem key before we did.  Let him have that one, loop
		 * around to try next key.
847
		 */
848
	}
B
Bruce Momjian 已提交
849

850 851 852 853 854 855 856 857 858
	/*
	 * OK, we created a new segment.  Mark it as created by this process.
	 * The order of assignments here is critical so that another Postgres
	 * process can't see the header as valid but belonging to an invalid
	 * PID!
	 */
	hdr = (PGShmemHeader *) memAddress;
	hdr->creatorPID = getpid();
	hdr->magic = PGShmemMagic;
B
Bruce Momjian 已提交
859

860 861 862 863 864
	/*
	 * Initialize space allocation status for segment.
	 */
	hdr->totalsize = size;
	hdr->freeoffset = MAXALIGN(sizeof(PGShmemHeader));
865

866
	return hdr;
867 868
}

869 870 871 872 873 874 875 876 877
/*
 * Create a semaphore set with the given number of useful semaphores
 * (an additional sema is actually allocated to serve as identifier).
 * Dead Postgres sema sets are recycled if found, but we do not fail
 * upon collision with non-Postgres sema sets.
 */
IpcSemaphoreId
IpcSemaphoreCreate(int numSems, int permission,
				   int semStartValue, bool removeOnExit)
878
{
B
Bruce Momjian 已提交
879
	IpcSemaphoreId semId;
880
	union semun semun;
881

882
	/* Loop till we find a free IPC key */
B
Bruce Momjian 已提交
883
	for (NextSemaID++;; NextSemaID++)
884
	{
B
Bruce Momjian 已提交
885
		pid_t		creatorPID;
886 887

		/* Try to create new semaphore set */
B
Bruce Momjian 已提交
888
		semId = InternalIpcSemaphoreCreate(NextSemaID, numSems + 1,
889 890 891 892
										   permission, semStartValue,
										   removeOnExit);
		if (semId >= 0)
			break;				/* successful create */
893

894
		/* See if it looks to be leftover from a dead Postgres process */
B
Bruce Momjian 已提交
895
		semId = semget(NextSemaID, numSems + 1, 0);
896 897 898 899
		if (semId < 0)
			continue;			/* failed: must be some other app's */
		if (IpcSemaphoreGetValue(semId, numSems) != PGSemaMagic)
			continue;			/* sema belongs to a non-Postgres app */
B
Bruce Momjian 已提交
900

901 902 903 904 905 906 907 908 909 910 911 912 913
		/*
		 * If the creator PID is my own PID or does not belong to any
		 * extant process, it's safe to zap it.
		 */
		creatorPID = IpcSemaphoreGetLastPID(semId, numSems);
		if (creatorPID <= 0)
			continue;			/* oops, GETPID failed */
		if (creatorPID != getpid())
		{
			if (kill(creatorPID, 0) == 0 ||
				errno != ESRCH)
				continue;		/* sema belongs to a live process */
		}
B
Bruce Momjian 已提交
914

915 916
		/*
		 * The sema set appears to be from a dead Postgres process, or
B
Bruce Momjian 已提交
917 918 919 920
		 * from a previous cycle of life in this same process.	Zap it, if
		 * possible.  This probably shouldn't fail, but if it does, assume
		 * the sema set belongs to someone else after all, and continue
		 * quietly.
921 922 923 924
		 */
		semun.val = 0;			/* unused, but keep compiler quiet */
		if (semctl(semId, 0, IPC_RMID, semun) < 0)
			continue;
B
Bruce Momjian 已提交
925

926 927 928
		/*
		 * Now try again to create the sema set.
		 */
B
Bruce Momjian 已提交
929
		semId = InternalIpcSemaphoreCreate(NextSemaID, numSems + 1,
930 931 932 933
										   permission, semStartValue,
										   removeOnExit);
		if (semId >= 0)
			break;				/* successful create */
B
Bruce Momjian 已提交
934

935 936
		/*
		 * Can only get here if some other process managed to create the
B
Bruce Momjian 已提交
937 938
		 * same sema key before we did.  Let him have that one, loop
		 * around to try next key.
939 940
		 */
	}
B
Bruce Momjian 已提交
941

942 943 944
	/*
	 * OK, we created a new sema set.  Mark it as created by this process.
	 * We do this by setting the spare semaphore to PGSemaMagic-1 and then
B
Bruce Momjian 已提交
945 946
	 * incrementing it with semop().  That leaves it with value
	 * PGSemaMagic and sempid referencing this process.
947
	 */
B
Bruce Momjian 已提交
948
	semun.val = PGSemaMagic - 1;
949 950 951
	if (semctl(semId, numSems, SETVAL, semun) < 0)
	{
		fprintf(stderr, "IpcSemaphoreCreate: semctl(id=%d, %d, SETVAL, %d) failed: %s\n",
B
Bruce Momjian 已提交
952
				semId, numSems, PGSemaMagic - 1, strerror(errno));
953 954 955 956

		if (errno == ERANGE)
			fprintf(stderr,
					"You possibly need to raise your kernel's SEMVMX value to be at least\n"
B
Bruce Momjian 已提交
957
			"%d.  Look into the PostgreSQL documentation for details.\n",
958 959 960 961 962 963 964 965
					PGSemaMagic);

		proc_exit(1);
	}
	IpcSemaphoreUnlock(semId, numSems);

	return semId;
}