postmaster.c 127.8 KB
Newer Older
1 2
/*-------------------------------------------------------------------------
 *
3
 * postmaster.c
4 5 6 7
 *	  This program acts as a clearing house for requests to the
 *	  POSTGRES system.	Frontend programs send a startup message
 *	  to the Postmaster and the postmaster uses the info in the
 *	  message to setup a backend process.
8
 *
T
Tom Lane 已提交
9
 *	  The postmaster also manages system-wide operations such as
B
Bruce Momjian 已提交
10
 *	  startup and shutdown. The postmaster itself doesn't do those
11 12 13
 *	  operations, mind you --- it just forks off a subprocess to do them
 *	  at the right times.  It also takes care of resetting the system
 *	  if a backend crashes.
T
Tom Lane 已提交
14 15 16
 *
 *	  The postmaster process creates the shared memory and semaphore
 *	  pools during startup, but as a rule does not touch them itself.
J
Jan Wieck 已提交
17
 *	  In particular, it is not a member of the PGPROC array of backends
B
Bruce Momjian 已提交
18
 *	  and so it cannot participate in lock-manager operations.	Keeping
T
Tom Lane 已提交
19 20 21 22 23 24
 *	  the postmaster away from shared memory operations makes it simpler
 *	  and more reliable.  The postmaster is almost always able to recover
 *	  from crashes of individual backends by resetting shared memory;
 *	  if it did much with shared memory then it would be prone to crashing
 *	  along with the backends.
 *
25 26 27 28 29 30 31 32 33
 *	  When a request message is received, we now fork() immediately.
 *	  The child process performs authentication of the request, and
 *	  then becomes a backend if successful.  This allows the auth code
 *	  to be written in a simple single-threaded style (as opposed to the
 *	  crufty "poor man's multitasking" code that used to be needed).
 *	  More importantly, it ensures that blockages in non-multithreaded
 *	  libraries like SSL or PAM cannot cause denial of service to other
 *	  clients.
 *
T
Tom Lane 已提交
34
 *
B
Bruce Momjian 已提交
35
 * Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group
B
Add:  
Bruce Momjian 已提交
36
 * Portions Copyright (c) 1994, Regents of the University of California
37 38 39
 *
 *
 * IDENTIFICATION
40
 *	  $PostgreSQL: pgsql/src/backend/postmaster/postmaster.c,v 1.596 2009/09/08 17:08:36 tgl Exp $
41 42 43 44
 *
 * NOTES
 *
 * Initialization:
45 46
 *		The Postmaster sets up shared memory data structures
 *		for the backends.
47 48
 *
 * Synchronization:
T
Tom Lane 已提交
49 50 51 52
 *		The Postmaster shares memory with the backends but should avoid
 *		touching shared memory, so as not to become stuck if a crashing
 *		backend screws up locks or shared memory.  Likewise, the Postmaster
 *		should never block on messages from frontend clients.
53
 *
54
 * Garbage Collection:
55 56
 *		The Postmaster cleans up after backends if they have an emergency
 *		exit and/or core dump.
57
 *
58 59 60
 * Error Reporting:
 *		Use write_stderr() only for reporting "interactive" errors
 *		(essentially, bogus arguments on the command line).  Once the
B
Bruce Momjian 已提交
61
 *		postmaster is launched, use ereport().	In particular, don't use
62 63
 *		write_stderr() for anything that occurs after pmdaemonize.
 *
64 65
 *-------------------------------------------------------------------------
 */
66

67 68
#include "postgres.h"

69 70
#include <unistd.h>
#include <signal.h>
71
#include <time.h>
72 73 74 75 76
#include <sys/wait.h>
#include <ctype.h>
#include <sys/stat.h>
#include <sys/socket.h>
#include <fcntl.h>
B
Bruce Momjian 已提交
77
#include <sys/param.h>
78
#include <netinet/in.h>
79
#include <arpa/inet.h>
B
Bruce Momjian 已提交
80
#include <netdb.h>
81
#include <limits.h>
82

83
#ifdef HAVE_SYS_SELECT_H
84 85
#include <sys/select.h>
#endif
86

87
#ifdef HAVE_GETOPT_H
88
#include <getopt.h>
89 90
#endif

91
#ifdef USE_BONJOUR
92
#include <dns_sd.h>
93 94
#endif

95
#include "access/transam.h"
96
#include "access/xlog.h"
97
#include "bootstrap/bootstrap.h"
98
#include "catalog/pg_control.h"
B
Bruce Momjian 已提交
99
#include "lib/dllist.h"
100
#include "libpq/auth.h"
101
#include "libpq/ip.h"
B
Bruce Momjian 已提交
102
#include "libpq/libpq.h"
103
#include "libpq/pqsignal.h"
104
#include "miscadmin.h"
105
#include "pgstat.h"
106
#include "postmaster/autovacuum.h"
107
#include "postmaster/fork_process.h"
108
#include "postmaster/pgarch.h"
109
#include "postmaster/postmaster.h"
110
#include "postmaster/syslogger.h"
111
#include "storage/fd.h"
B
Bruce Momjian 已提交
112
#include "storage/ipc.h"
113
#include "storage/pg_shmem.h"
114
#include "storage/pmsignal.h"
B
Bruce Momjian 已提交
115 116
#include "storage/proc.h"
#include "tcop/tcopprot.h"
117
#include "utils/builtins.h"
118
#include "utils/datetime.h"
119
#include "utils/memutils.h"
120
#include "utils/ps_status.h"
121

122 123 124 125
#ifdef EXEC_BACKEND
#include "storage/spin.h"
#endif

126

127
/*
128 129 130 131
 * List of active backends (or child processes anyway; we don't actually
 * know whether a given child has become a backend or is still in the
 * authorization phase).  This is used mainly to keep track of how many
 * children we have and send them appropriate signals when necessary.
132
 *
133
 * "Special" children such as the startup, bgwriter and autovacuum launcher
B
Bruce Momjian 已提交
134
 * tasks are not in this list.	Autovacuum worker processes are in it.
135 136
 * Also, "dead_end" children are in it: these are children launched just
 * for the purpose of sending a friendly rejection message to a would-be
B
Bruce Momjian 已提交
137
 * client.	We must track them because they are attached to shared memory,
138 139
 * but we know they will never become live backends.  dead_end children are
 * not assigned a PMChildSlot.
140
 */
141 142
typedef struct bkend
{
143
	pid_t		pid;			/* process id of backend */
144
	long		cancel_key;		/* cancel key for cancels for this backend */
145
	int			child_slot;		/* PMChildSlot for this backend, if any */
146
	bool		is_autovacuum;	/* is it an autovacuum process? */
147
	bool		dead_end;		/* is it going to send an error and quit? */
T
Tom Lane 已提交
148
	Dlelem		elem;			/* list link in BackendList */
B
Bruce Momjian 已提交
149
} Backend;
150

151
static Dllist *BackendList;
152

153 154 155 156
#ifdef EXEC_BACKEND
static Backend *ShmemBackendArray;
#endif

157
/* The socket number we are listening for connections on */
B
Bruce Momjian 已提交
158 159
int			PostPortNumber;
char	   *UnixSocketDir;
160
char	   *ListenAddresses;
161

162 163 164
/*
 * ReservedBackends is the number of backends reserved for superuser use.
 * This number is taken out of the pool size given by MaxBackends so
165 166 167 168 169
 * number of backend slots available to non-superusers is
 * (MaxBackends - ReservedBackends).  Note what this really means is
 * "if there are <= ReservedBackends connections available, only superusers
 * can make new connections" --- pre-existing superuser connections don't
 * count against the limit.
170
 */
171
int			ReservedBackends;
172

173
/* The socket(s) we're listening to. */
174
#define MAXLISTEN	64
175
static int	ListenSocket[MAXLISTEN];
176 177 178 179

/*
 * Set by the -o option
 */
180
static char ExtraOptions[MAXPGPATH];
181 182 183

/*
 * These globals control the behavior of the postmaster in case some
184
 * backend dumps core.	Normally, it kills all peers of the dead backend
185 186
 * and reinitializes shared memory.  By specifying -s or -n, we can have
 * the postmaster stop (rather than kill) peers and not reinitialize
B
Bruce Momjian 已提交
187
 * shared data structures.	(Reinit is currently dead code, though.)
188
 */
189
static bool Reinit = true;
190
static int	SendStop = false;
191

192
/* still more option variables */
B
Bruce Momjian 已提交
193
bool		EnableSSL = false;
194
bool		SilentMode = false; /* silent_mode */
195

196 197
int			PreAuthDelay = 0;
int			AuthenticationTimeout = 60;
V
Vadim B. Mikheev 已提交
198

199
bool		log_hostname;		/* for ps display and logging */
200
bool		Log_connections = false;
201 202
bool		Db_user_namespace = false;

203
bool		enable_bonjour = false;
204
char	   *bonjour_name;
205

206
/* PIDs of special child processes; 0 when not running */
B
Bruce Momjian 已提交
207
static pid_t StartupPID = 0,
208
			BgWriterPID = 0,
209
			WalWriterPID = 0,
210
			AutoVacPID = 0,
211
			PgArchPID = 0,
B
Bruce Momjian 已提交
212 213
			PgStatPID = 0,
			SysLoggerPID = 0;
214

215
/* Startup/shutdown state */
216 217 218
#define			NoShutdown		0
#define			SmartShutdown	1
#define			FastShutdown	2
219

220
static int	Shutdown = NoShutdown;
221

B
Bruce Momjian 已提交
222
static bool FatalError = false; /* T if recovering from backend crash */
223
static bool RecoveryError = false;		/* T if WAL recovery failed */
224

225 226 227 228
/*
 * We use a simple state machine to control startup, shutdown, and
 * crash recovery (which is rather like shutdown followed by startup).
 *
229 230
 * After doing all the postmaster initialization work, we enter PM_STARTUP
 * state and the startup process is launched. The startup process begins by
231 232 233 234 235
 * reading the control file and other preliminary initialization steps.
 * In a normal startup, or after crash recovery, the startup process exits
 * with exit code 0 and we switch to PM_RUN state.  However, archive recovery
 * is handled specially since it takes much longer and we would like to support
 * hot standby during archive recovery.
236
 *
237 238 239
 * When the startup process is ready to start archive recovery, it signals the
 * postmaster, and we switch to PM_RECOVERY state. The background writer is
 * launched, while the startup process continues applying WAL.
240
 * After reaching a consistent point in WAL redo, startup process signals
241
 * us again, and we switch to PM_RECOVERY_CONSISTENT state. There's currently
242 243 244
 * no difference between PM_RECOVERY and PM_RECOVERY_CONSISTENT, but we
 * could start accepting connections to perform read-only queries at this
 * point, if we had the infrastructure to do that.
245 246
 * When archive recovery is finished, the startup process exits with exit
 * code 0 and we switch to PM_RUN state.
247
 *
248
 * Normal child backends can only be launched when we are in PM_RUN state.
249
 * (We also allow it in PM_WAIT_BACKUP state, but only for superusers.)
250 251 252 253 254 255 256 257 258 259 260 261
 * In other states we handle connection requests by launching "dead_end"
 * child processes, which will simply send the client an error message and
 * quit.  (We track these in the BackendList so that we can know when they
 * are all gone; this is important because they're still connected to shared
 * memory, and would interfere with an attempt to destroy the shmem segment,
 * possibly leading to SHMALL failure when we try to make a new one.)
 * In PM_WAIT_DEAD_END state we are waiting for all the dead_end children
 * to drain out of the system, and therefore stop accepting connection
 * requests at all until the last existing child has quit (which hopefully
 * will not be very long).
 *
 * Notice that this state variable does not distinguish *why* we entered
262
 * states later than PM_RUN --- Shutdown and FatalError must be consulted
263 264 265 266 267 268
 * to find that out.  FatalError is never true in PM_RECOVERY_* or PM_RUN
 * states, nor in PM_SHUTDOWN states (because we don't enter those states
 * when trying to recover from a crash).  It can be true in PM_STARTUP state,
 * because we don't clear it until we've successfully started WAL redo.
 * Similarly, RecoveryError means that we have crashed during recovery, and
 * should not try to restart.
269
 */
B
Bruce Momjian 已提交
270 271
typedef enum
{
272 273
	PM_INIT,					/* postmaster starting */
	PM_STARTUP,					/* waiting for startup subprocess */
274
	PM_RECOVERY,				/* in archive recovery mode */
275
	PM_RECOVERY_CONSISTENT,		/* consistent recovery mode */
276
	PM_RUN,						/* normal "database is alive" state */
277
	PM_WAIT_BACKUP,				/* waiting for online backup mode to end */
278 279
	PM_WAIT_BACKENDS,			/* waiting for live backends to exit */
	PM_SHUTDOWN,				/* waiting for bgwriter to do shutdown ckpt */
280
	PM_SHUTDOWN_2,				/* waiting for archiver to finish */
281 282
	PM_WAIT_DEAD_END,			/* waiting for dead_end children to exit */
	PM_NO_CHILDREN				/* all important children have exited */
283
} PMState;
284 285 286

static PMState pmState = PM_INIT;

B
Bruce Momjian 已提交
287 288
bool		ClientAuthInProgress = false;		/* T during new-client
												 * authentication */
289

B
Bruce Momjian 已提交
290
bool		redirection_done = false;	/* stderr redirected for syslogger? */
291

292
/* received START_AUTOVAC_LAUNCHER signal */
293
static volatile sig_atomic_t start_autovac_launcher = false;
294 295
/* the launcher needs to be signalled to communicate some condition */
static volatile bool		avlauncher_needs_signal = false;
296

297 298 299 300 301
/*
 * State for assigning random salts and cancel keys.
 * Also, the global MyCancelKey passes the cancel key assigned to a given
 * backend from the postmaster to that backend (via fork).
 */
302
static unsigned int random_seed = 0;
303
static struct timeval random_start_time;
304 305 306 307

extern char *optarg;
extern int	optind,
			opterr;
308

309
#ifdef HAVE_INT_OPTRESET
310
extern int	optreset;			/* might not be declared by system headers */
311
#endif
312

313 314 315 316
#ifdef USE_BONJOUR
static DNSServiceRef bonjour_sdref = NULL;
#endif

317
/*
318 319
 * postmaster.c - function prototypes
 */
320
static void getInstallationPaths(const char *argv0);
321
static void checkDataDir(void);
322
static void pmdaemonize(void);
323 324
static Port *ConnCreate(int serverFd);
static void ConnFree(Port *port);
325
static void reset_shared(int port);
326 327 328
static void SIGHUP_handler(SIGNAL_ARGS);
static void pmdie(SIGNAL_ARGS);
static void reaper(SIGNAL_ARGS);
329
static void sigusr1_handler(SIGNAL_ARGS);
330
static void startup_die(SIGNAL_ARGS);
331
static void dummy_handler(SIGNAL_ARGS);
332 333
static void CleanupBackend(int pid, int exitstatus);
static void HandleChildCrash(int pid, int exitstatus, const char *procname);
334
static void LogChildExit(int lev, const char *procname,
B
Bruce Momjian 已提交
335
			 int pid, int exitstatus);
336
static void PostmasterStateMachine(void);
337
static void BackendInitialize(Port *port);
B
Bruce Momjian 已提交
338
static int	BackendRun(Port *port);
339
static void ExitPostmaster(int status);
340 341
static int	ServerLoop(void);
static int	BackendStartup(Port *port);
342
static int	ProcessStartupPacket(Port *port, bool SSLdone);
343
static void processCancelRequest(Port *port, void *pkt);
344
static int	initMasks(fd_set *rmask);
345
static void report_fork_failure_to_client(Port *port, int errnum);
346
static enum CAC_state canAcceptConnections(void);
347
static long PostmasterRandom(void);
348
static void RandomSalt(char *md5Salt);
349
static void signal_child(pid_t pid, int signal);
350
static void SignalSomeChildren(int signal, bool only_autovac);
B
Bruce Momjian 已提交
351

352 353
#define SignalChildren(sig)			SignalSomeChildren(sig, false)
#define SignalAutovacWorkers(sig)	SignalSomeChildren(sig, true)
354
static int	CountChildren(void);
355
static bool CreateOptsFile(int argc, char *argv[], char *fullprogname);
356
static pid_t StartChildProcess(AuxProcType type);
357
static void StartAutovacuumWorker(void);
358

359
#ifdef EXEC_BACKEND
360

361
#ifdef WIN32
362
static pid_t win32_waitpid(int *exitstatus);
363
static void WINAPI pgwin32_deadchild_callback(PVOID lpParameter, BOOLEAN TimerOrWaitFired);
364

365 366
static HANDLE win32ChildQueue;

B
Bruce Momjian 已提交
367
typedef struct
368
{
B
Bruce Momjian 已提交
369 370 371
	HANDLE		waitHandle;
	HANDLE		procHandle;
	DWORD		procId;
372
} win32_deadchild_waitinfo;
373

B
Bruce Momjian 已提交
374
HANDLE		PostmasterHandle;
375 376
#endif

377 378
static pid_t backend_forkexec(Port *port);
static pid_t internal_forkexec(int argc, char *argv[], Port *port);
379

380 381 382 383
/* Type for a socket that can be inherited to a client process */
#ifdef WIN32
typedef struct
{
B
Bruce Momjian 已提交
384 385
	SOCKET		origsocket;		/* Original socket value, or -1 if not a
								 * socket */
386
	WSAPROTOCOL_INFO wsainfo;
387
} InheritableSocket;
388 389 390 391 392 393 394 395 396 397 398
#else
typedef int InheritableSocket;
#endif

typedef struct LWLock LWLock;	/* ugly kluge */

/*
 * Structure contains all variables passed to exec:ed backends
 */
typedef struct
{
B
Bruce Momjian 已提交
399
	Port		port;
400
	InheritableSocket portsocket;
B
Bruce Momjian 已提交
401 402 403
	char		DataDir[MAXPGPATH];
	int			ListenSocket[MAXLISTEN];
	long		MyCancelKey;
404
	int			MyPMChildSlot;
405
	unsigned long UsedShmemSegID;
B
Bruce Momjian 已提交
406 407
	void	   *UsedShmemSegAddr;
	slock_t    *ShmemLock;
408
	VariableCache ShmemVariableCache;
B
Bruce Momjian 已提交
409 410 411
	Backend    *ShmemBackendArray;
	LWLock	   *LWLockArray;
	slock_t    *ProcStructLock;
412
	PROC_HDR   *ProcGlobal;
413
	PGPROC	   *AuxiliaryProcs;
414
	PMSignalData *PMSignalState;
415
	InheritableSocket pgStatSock;
B
Bruce Momjian 已提交
416
	pid_t		PostmasterPid;
417
	TimestampTz PgStartTime;
418
	TimestampTz PgReloadTime;
B
Bruce Momjian 已提交
419
	bool		redirection_done;
420
#ifdef WIN32
B
Bruce Momjian 已提交
421 422 423
	HANDLE		PostmasterHandle;
	HANDLE		initial_signal_pipe;
	HANDLE		syslogPipe[2];
424
#else
B
Bruce Momjian 已提交
425
	int			syslogPipe[2];
426
#endif
B
Bruce Momjian 已提交
427 428 429
	char		my_exec_path[MAXPGPATH];
	char		pkglib_path[MAXPGPATH];
	char		ExtraOptions[MAXPGPATH];
430
} BackendParameters;
431 432

static void read_backend_variables(char *id, Port *port);
433
static void restore_backend_variables(BackendParameters *param, Port *port);
B
Bruce Momjian 已提交
434

435
#ifndef WIN32
436
static bool save_backend_variables(BackendParameters *param, Port *port);
437
#else
438
static bool save_backend_variables(BackendParameters *param, Port *port,
B
Bruce Momjian 已提交
439
					   HANDLE childProcess, pid_t childPid);
440
#endif
441

B
Bruce Momjian 已提交
442
static void ShmemBackendArrayAdd(Backend *bn);
443
static void ShmemBackendArrayRemove(Backend *bn);
B
Bruce Momjian 已提交
444
#endif   /* EXEC_BACKEND */
445

446 447
#define StartupDataBase()		StartChildProcess(StartupProcess)
#define StartBackgroundWriter() StartChildProcess(BgWriterProcess)
448
#define StartWalWriter()		StartChildProcess(WalWriterProcess)
449

450 451 452 453
/* Macros to check exit status of a child process */
#define EXIT_STATUS_0(st)  ((st) == 0)
#define EXIT_STATUS_1(st)  (WIFEXITED(st) && WEXITSTATUS(st) == 1)

454

455 456 457
/*
 * Postmaster main entry point
 */
458 459 460
int
PostmasterMain(int argc, char *argv[])
{
B
Bruce Momjian 已提交
461 462
	int			opt;
	int			status;
463
	char	   *userDoption = NULL;
B
Bruce Momjian 已提交
464
	int			i;
465

466 467
	MyProcPid = PostmasterPid = getpid();

468 469
	MyStartTime = time(NULL);

470 471
	IsPostmasterEnvironment = true;

472
	/*
B
Bruce Momjian 已提交
473
	 * for security, no dir or file created can be group or other accessible
474 475 476
	 */
	umask((mode_t) 0077);

477
	/*
478
	 * Fire up essential subsystems: memory management
479 480 481 482
	 */
	MemoryContextInit();

	/*
B
Bruce Momjian 已提交
483 484 485 486
	 * By default, palloc() requests in the postmaster will be allocated in
	 * the PostmasterContext, which is space that can be recycled by backends.
	 * Allocated data that needs to be available to backends should be
	 * allocated in TopMemoryContext.
487 488 489 490 491 492 493 494
	 */
	PostmasterContext = AllocSetContextCreate(TopMemoryContext,
											  "Postmaster",
											  ALLOCSET_DEFAULT_MINSIZE,
											  ALLOCSET_DEFAULT_INITSIZE,
											  ALLOCSET_DEFAULT_MAXSIZE);
	MemoryContextSwitchTo(PostmasterContext);

495 496
	/* Initialize paths to installation files */
	getInstallationPaths(argv[0]);
497

498 499 500
	/*
	 * Options setup
	 */
501
	InitializeGUCOptions();
502

503
	opterr = 1;
504

505
	/*
B
Bruce Momjian 已提交
506 507 508
	 * Parse command-line options.	CAUTION: keep this in sync with
	 * tcop/postgres.c (the option sets should not conflict) and with the
	 * common help() function in main/main.c.
509
	 */
510
	while ((opt = getopt(argc, argv, "A:B:c:D:d:EeFf:h:ijk:lN:nOo:Pp:r:S:sTt:W:-:")) != -1)
511 512 513
	{
		switch (opt)
		{
M
 
Marc G. Fournier 已提交
514
			case 'A':
515
				SetConfigOption("debug_assertions", optarg, PGC_POSTMASTER, PGC_S_ARGV);
516
				break;
517

518
			case 'B':
519
				SetConfigOption("shared_buffers", optarg, PGC_POSTMASTER, PGC_S_ARGV);
520
				break;
521

522
			case 'D':
523
				userDoption = optarg;
524
				break;
525

526
			case 'd':
527 528
				set_debug_options(atoi(optarg), PGC_POSTMASTER, PGC_S_ARGV);
				break;
529 530 531 532 533 534 535 536 537

			case 'E':
				SetConfigOption("log_statement", "all", PGC_POSTMASTER, PGC_S_ARGV);
				break;

			case 'e':
				SetConfigOption("datestyle", "euro", PGC_POSTMASTER, PGC_S_ARGV);
				break;

538
			case 'F':
539
				SetConfigOption("fsync", "false", PGC_POSTMASTER, PGC_S_ARGV);
540
				break;
541 542 543 544 545 546 547 548 549 550

			case 'f':
				if (!set_plan_disabling_options(optarg, PGC_POSTMASTER, PGC_S_ARGV))
				{
					write_stderr("%s: invalid argument for option -f: \"%s\"\n",
								 progname, optarg);
					ExitPostmaster(1);
				}
				break;

551
			case 'h':
552
				SetConfigOption("listen_addresses", optarg, PGC_POSTMASTER, PGC_S_ARGV);
553
				break;
554

B
Bruce Momjian 已提交
555
			case 'i':
556
				SetConfigOption("listen_addresses", "*", PGC_POSTMASTER, PGC_S_ARGV);
557
				break;
558 559 560 561 562

			case 'j':
				/* only used by interactive backend */
				break;

563
			case 'k':
564
				SetConfigOption("unix_socket_directory", optarg, PGC_POSTMASTER, PGC_S_ARGV);
565
				break;
566

567
			case 'l':
568
				SetConfigOption("ssl", "true", PGC_POSTMASTER, PGC_S_ARGV);
569
				break;
570

571
			case 'N':
572
				SetConfigOption("max_connections", optarg, PGC_POSTMASTER, PGC_S_ARGV);
573
				break;
574

575 576
			case 'n':
				/* Don't reinit shared mem after abnormal exit */
577
				Reinit = false;
578
				break;
579

580 581 582 583 584 585
			case 'O':
				SetConfigOption("allow_system_table_mods", "true", PGC_POSTMASTER, PGC_S_ARGV);
				break;

			case 'o':
				/* Other options to pass to the backend on the command line */
586 587 588
				snprintf(ExtraOptions + strlen(ExtraOptions),
						 sizeof(ExtraOptions) - strlen(ExtraOptions),
						 " %s", optarg);
589
				break;
590 591 592 593 594

			case 'P':
				SetConfigOption("ignore_system_indexes", "true", PGC_POSTMASTER, PGC_S_ARGV);
				break;

595
			case 'p':
596
				SetConfigOption("port", optarg, PGC_POSTMASTER, PGC_S_ARGV);
597
				break;
598

599 600 601 602 603 604
			case 'r':
				/* only used by single-user backend */
				break;

			case 'S':
				SetConfigOption("work_mem", optarg, PGC_POSTMASTER, PGC_S_ARGV);
605
				break;
606

607
			case 's':
608
				SetConfigOption("log_statement_stats", "true", PGC_POSTMASTER, PGC_S_ARGV);
609
				break;
610

611
			case 'T':
B
Bruce Momjian 已提交
612

613
				/*
B
Bruce Momjian 已提交
614 615 616
				 * In the event that some backend dumps core, send SIGSTOP,
				 * rather than SIGQUIT, to all its peers.  This lets the wily
				 * post_hacker collect core dumps from everyone.
617
				 */
618
				SendStop = true;
619
				break;
620 621 622

			case 't':
				{
B
Bruce Momjian 已提交
623 624 625 626 627 628 629 630 631 632 633 634 635
					const char *tmp = get_stats_option_name(optarg);

					if (tmp)
					{
						SetConfigOption(tmp, "true", PGC_POSTMASTER, PGC_S_ARGV);
					}
					else
					{
						write_stderr("%s: invalid argument for option -t: \"%s\"\n",
									 progname, optarg);
						ExitPostmaster(1);
					}
					break;
636 637 638 639 640 641
				}

			case 'W':
				SetConfigOption("post_auth_delay", optarg, PGC_POSTMASTER, PGC_S_ARGV);
				break;

642
			case 'c':
643
			case '-':
644
				{
B
Bruce Momjian 已提交
645 646 647 648 649 650 651
					char	   *name,
							   *value;

					ParseLongOption(optarg, &name, &value);
					if (!value)
					{
						if (opt == '-')
652 653 654 655
							ereport(ERROR,
									(errcode(ERRCODE_SYNTAX_ERROR),
									 errmsg("--%s requires a value",
											optarg)));
B
Bruce Momjian 已提交
656
						else
657 658 659 660
							ereport(ERROR,
									(errcode(ERRCODE_SYNTAX_ERROR),
									 errmsg("-c %s requires a value",
											optarg)));
B
Bruce Momjian 已提交
661 662
					}

663
					SetConfigOption(name, value, PGC_POSTMASTER, PGC_S_ARGV);
B
Bruce Momjian 已提交
664 665 666 667
					free(name);
					if (value)
						free(value);
					break;
668
				}
669

670
			default:
671 672
				write_stderr("Try \"%s --help\" for more information.\n",
							 progname);
673
				ExitPostmaster(1);
674 675
		}
	}
676

677 678 679 680 681
	/*
	 * Postmaster accepts no non-option switch arguments.
	 */
	if (optind < argc)
	{
682 683 684 685
		write_stderr("%s: invalid argument: \"%s\"\n",
					 progname, argv[optind]);
		write_stderr("Try \"%s --help\" for more information.\n",
					 progname);
686 687 688
		ExitPostmaster(1);
	}

689
	/*
B
Bruce Momjian 已提交
690 691
	 * Locate the proper configuration files and data directory, and read
	 * postgresql.conf for the first time.
692 693 694
	 */
	if (!SelectConfigFiles(userDoption, progname))
		ExitPostmaster(2);
B
Bruce Momjian 已提交
695

696 697
	/* Verify that DataDir looks reasonable */
	checkDataDir();
698

699 700 701
	/* And switch working directory into it */
	ChangeToDataDir();

702 703
	/*
	 * Check for invalid combinations of GUC settings.
704
	 */
705
	if (ReservedBackends >= MaxBackends)
706
	{
707
		write_stderr("%s: superuser_reserved_connections must be less than max_connections\n", progname);
708 709
		ExitPostmaster(1);
	}
710

711
	/*
712 713
	 * Other one-time internal sanity checks can go here, if they are fast.
	 * (Put any slow processing further down, after postmaster.pid creation.)
714 715 716
	 */
	if (!CheckDateTokenTables())
	{
717
		write_stderr("%s: invalid datetoken tables, please fix\n", progname);
718 719 720
		ExitPostmaster(1);
	}

721
	/*
722 723
	 * Now that we are done processing the postmaster arguments, reset
	 * getopt(3) library so that it will work correctly in subprocesses.
724 725 726
	 */
	optind = 1;
#ifdef HAVE_INT_OPTRESET
727
	optreset = 1;				/* some systems need this too */
728 729 730
#endif

	/* For debugging: display postmaster environment */
731 732 733 734
	{
		extern char **environ;
		char	  **p;

735
		ereport(DEBUG3,
B
Bruce Momjian 已提交
736 737
				(errmsg_internal("%s: PostmasterMain: initial environ dump:",
								 progname)));
738
		ereport(DEBUG3,
B
Bruce Momjian 已提交
739
			 (errmsg_internal("-----------------------------------------")));
740
		for (p = environ; *p; ++p)
741 742 743
			ereport(DEBUG3,
					(errmsg_internal("\t%s", *p)));
		ereport(DEBUG3,
B
Bruce Momjian 已提交
744
			 (errmsg_internal("-----------------------------------------")));
745 746
	}

747
	/*
748
	 * Fork away from controlling terminal, if silent_mode specified.
749
	 *
750 751
	 * Must do this before we grab any interlock files, else the interlocks
	 * will show the wrong PID.
752 753
	 */
	if (SilentMode)
754
		pmdaemonize();
755 756 757 758

	/*
	 * Create lockfile for data directory.
	 *
B
Bruce Momjian 已提交
759 760 761 762 763
	 * We want to do this before we try to grab the input sockets, because the
	 * data directory interlock is more reliable than the socket-file
	 * interlock (thanks to whoever decided to put socket files in /tmp :-().
	 * For the same reason, it's best to grab the TCP socket(s) before the
	 * Unix socket.
764
	 */
765
	CreateDataDirLockFile(true);
766

767
	/*
768
	 * If timezone is not set, determine what the OS uses.	(In theory this
769 770 771
	 * should be done during GUC initialization, but because it can take as
	 * much as several seconds, we delay it until after we've created the
	 * postmaster.pid file.  This prevents problems with boot scripts that
772 773
	 * expect the pidfile to appear quickly.  Also, we avoid problems with
	 * trying to locate the timezone files too early in initialization.)
774 775 776
	 */
	pg_timezone_initialize();

777 778 779 780 781
	/*
	 * Likewise, init timezone_abbreviations if not already set.
	 */
	pg_timezone_abbrev_initialize();

782 783 784 785 786 787 788 789 790
	/*
	 * Initialize SSL library, if specified.
	 */
#ifdef USE_SSL
	if (EnableSSL)
		secure_initialize();
#endif

	/*
791
	 * process any libraries that should be preloaded at postmaster start
792
	 */
793
	process_shared_preload_libraries();
794

795
	/*
796
	 * Remove old temporary files.	At this point there can be no other
B
Bruce Momjian 已提交
797
	 * Postgres processes running in this directory, so this should be safe.
798 799 800
	 */
	RemovePgTempFiles();

801 802 803
	/*
	 * Establish input sockets.
	 */
B
Bruce Momjian 已提交
804 805
	for (i = 0; i < MAXLISTEN; i++)
		ListenSocket[i] = -1;
806

807
	if (ListenAddresses)
B
Bruce Momjian 已提交
808
	{
B
Bruce Momjian 已提交
809 810 811
		char	   *rawstring;
		List	   *elemlist;
		ListCell   *l;
812
		int			success = 0;
813

814 815 816 817
		/* Need a modifiable copy of ListenAddresses */
		rawstring = pstrdup(ListenAddresses);

		/* Parse string into list of identifiers */
B
Bruce Momjian 已提交
818
		if (!SplitIdentifierString(rawstring, ',', &elemlist))
819
		{
820 821 822
			/* syntax error in list */
			ereport(FATAL,
					(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
B
Bruce Momjian 已提交
823
					 errmsg("invalid list syntax for \"listen_addresses\"")));
824 825 826 827
		}

		foreach(l, elemlist)
		{
B
Bruce Momjian 已提交
828
			char	   *curhost = (char *) lfirst(l);
829

B
Bruce Momjian 已提交
830
			if (strcmp(curhost, "*") == 0)
831 832 833 834 835
				status = StreamServerPort(AF_UNSPEC, NULL,
										  (unsigned short) PostPortNumber,
										  UnixSocketDir,
										  ListenSocket, MAXLISTEN);
			else
836 837 838 839
				status = StreamServerPort(AF_UNSPEC, curhost,
										  (unsigned short) PostPortNumber,
										  UnixSocketDir,
										  ListenSocket, MAXLISTEN);
840 841 842
			if (status == STATUS_OK)
				success++;
			else
843
				ereport(WARNING,
B
Bruce Momjian 已提交
844 845
						(errmsg("could not create listen socket for \"%s\"",
								curhost)));
846
		}
847

848 849 850 851
		if (!success && list_length(elemlist))
			ereport(FATAL,
					(errmsg("could not create any TCP/IP sockets")));

852 853
		list_free(elemlist);
		pfree(rawstring);
854
	}
855

856 857
#ifdef USE_BONJOUR
	/* Register for Bonjour only if we opened TCP socket(s) */
858
	if (enable_bonjour && ListenSocket[0] != -1)
859
	{
860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889
		DNSServiceErrorType err;

		/*
		 * We pass 0 for interface_index, which will result in registering on
		 * all "applicable" interfaces.  It's not entirely clear from the
		 * DNS-SD docs whether this would be appropriate if we have bound to
		 * just a subset of the available network interfaces.
		 */
		err = DNSServiceRegister(&bonjour_sdref,
								 0,
								 0,
								 bonjour_name,
								 "_postgresql._tcp.",
								 NULL,
								 NULL,
								 htons(PostPortNumber),
								 0,
								 NULL,
								 NULL,
								 NULL);
		if (err != kDNSServiceErr_NoError)
			elog(LOG, "DNSServiceRegister() failed: error code %ld",
				 (long) err);
		/*
		 * We don't bother to read the mDNS daemon's reply, and we expect
		 * that it will automatically terminate our registration when the
		 * socket is closed at postmaster termination.  So there's nothing
		 * more to be done here.  However, the bonjour_sdref is kept around
		 * so that forked children can close their copies of the socket.
		 */
B
Bruce Momjian 已提交
890
	}
891
#endif
892

893
#ifdef HAVE_UNIX_SOCKETS
B
Bruce Momjian 已提交
894
	status = StreamServerPort(AF_UNIX, NULL,
895 896 897
							  (unsigned short) PostPortNumber,
							  UnixSocketDir,
							  ListenSocket, MAXLISTEN);
898
	if (status != STATUS_OK)
899
		ereport(WARNING,
900
				(errmsg("could not create Unix-domain socket")));
901
#endif
902

903 904 905 906 907
	/*
	 * check that we have some socket to listen on
	 */
	if (ListenSocket[0] == -1)
		ereport(FATAL,
908
				(errmsg("no socket created for listening")));
909

910 911 912
	/*
	 * Set up shared memory and semaphores.
	 */
B
Bruce Momjian 已提交
913
	reset_shared(PostPortNumber);
914

915
	/*
B
Bruce Momjian 已提交
916 917
	 * Estimate number of openable files.  This must happen after setting up
	 * semaphores, because on some platforms semaphores count as open files.
918 919 920
	 */
	set_max_safe_fds();

921
	/*
922
	 * Initialize the list of active backends.
923 924 925
	 */
	BackendList = DLNewList();

926
#ifdef WIN32
B
Bruce Momjian 已提交
927

928
	/*
929
	 * Initialize I/O completion port used to deliver list of dead children.
930
	 */
931 932
	win32ChildQueue = CreateIoCompletionPort(INVALID_HANDLE_VALUE, NULL, 0, 1);
	if (win32ChildQueue == NULL)
933
		ereport(FATAL,
B
Bruce Momjian 已提交
934
		   (errmsg("could not create I/O completion port for child queue")));
935 936 937 938 939 940 941 942 943 944 945 946 947

	/*
	 * Set up a handle that child processes can use to check whether the
	 * postmaster is still running.
	 */
	if (DuplicateHandle(GetCurrentProcess(),
						GetCurrentProcess(),
						GetCurrentProcess(),
						&PostmasterHandle,
						0,
						TRUE,
						DUPLICATE_SAME_ACCESS) == 0)
		ereport(FATAL,
B
Bruce Momjian 已提交
948 949
				(errmsg_internal("could not duplicate postmaster handle: error code %d",
								 (int) GetLastError())));
950 951
#endif

952
	/*
B
Bruce Momjian 已提交
953 954
	 * Record postmaster options.  We delay this till now to avoid recording
	 * bogus options (eg, NBuffers too high for available memory).
955
	 */
956
	if (!CreateOptsFile(argc, argv, my_exec_path))
957
		ExitPostmaster(1);
958

959
#ifdef EXEC_BACKEND
960
	/* Write out nondefault GUC settings for child processes to use */
961 962 963
	write_nondefault_variables(PGC_POSTMASTER);
#endif

964 965 966
	/*
	 * Write the external PID file if requested
	 */
967
	if (external_pid_file)
968
	{
969
		FILE	   *fpidfile = fopen(external_pid_file, "w");
970 971 972 973 974 975 976 977

		if (fpidfile)
		{
			fprintf(fpidfile, "%d\n", MyProcPid);
			fclose(fpidfile);
			/* Should we remove the pid file on postmaster exit? */
		}
		else
P
Peter Eisentraut 已提交
978
			write_stderr("%s: could not write external PID file \"%s\": %s\n",
979
						 progname, external_pid_file, strerror(errno));
980 981
	}

M
 
Marc G. Fournier 已提交
982 983
	/*
	 * Set up signal handlers for the postmaster process.
984
	 *
B
Bruce Momjian 已提交
985 986
	 * CAUTION: when changing this list, check for side-effects on the signal
	 * handling setup of child processes.  See tcop/postgres.c,
987 988 989
	 * bootstrap/bootstrap.c, postmaster/bgwriter.c, postmaster/walwriter.c,
	 * postmaster/autovacuum.c, postmaster/pgarch.c, postmaster/pgstat.c, and
	 * postmaster/syslogger.c.
M
 
Marc G. Fournier 已提交
990
	 */
991
	pqinitmask();
992 993
	PG_SETMASK(&BlockSig);

B
Bruce Momjian 已提交
994 995
	pqsignal(SIGHUP, SIGHUP_handler);	/* reread config file and have
										 * children do same */
996
	pqsignal(SIGINT, pmdie);	/* send SIGTERM and shut down */
T
Tom Lane 已提交
997
	pqsignal(SIGQUIT, pmdie);	/* send SIGQUIT and die */
998
	pqsignal(SIGTERM, pmdie);	/* wait for children and shut down */
999 1000
	pqsignal(SIGALRM, SIG_IGN); /* ignored */
	pqsignal(SIGPIPE, SIG_IGN); /* ignored */
1001
	pqsignal(SIGUSR1, sigusr1_handler); /* message from child process */
1002
	pqsignal(SIGUSR2, dummy_handler);	/* unused, reserve for children */
1003 1004 1005
	pqsignal(SIGCHLD, reaper);	/* handle child termination */
	pqsignal(SIGTTIN, SIG_IGN); /* ignored */
	pqsignal(SIGTTOU, SIG_IGN); /* ignored */
1006 1007 1008 1009
	/* ignore SIGXFSZ, so that ulimit violations work like disk full */
#ifdef SIGXFSZ
	pqsignal(SIGXFSZ, SIG_IGN); /* ignored */
#endif
1010

1011 1012 1013 1014 1015
	/*
	 * If enabled, start up syslogger collection subprocess
	 */
	SysLoggerPID = SysLogger_Start();

1016
	/*
1017 1018
	 * Reset whereToSendOutput from DestDebug (its starting state) to
	 * DestNone. This stops ereport from sending log messages to stderr unless
B
Bruce Momjian 已提交
1019 1020 1021
	 * Log_destination permits.  We don't do this until the postmaster is
	 * fully launched, since startup failures may as well be reported to
	 * stderr.
1022
	 */
1023
	whereToSendOutput = DestNone;
1024

1025
	/*
1026 1027
	 * Initialize stats collection subsystem (this does NOT start the
	 * collector process!)
1028
	 */
1029
	pgstat_init();
1030

1031
	/*
1032
	 * Initialize the autovacuum subsystem (again, no process start yet)
1033
	 */
1034
	autovac_init();
1035

1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049
	/*
	 * Load configuration files for client authentication.
	 */
	if (!load_hba())
	{
		/*
		 * It makes no sense to continue if we fail to load the HBA file,
		 * since there is no way to connect to the database in this case.
		 */
		ereport(FATAL,
				(errmsg("could not load pg_hba.conf")));
	}
	load_ident();

1050
	/*
1051
	 * Remember postmaster startup time
1052
	 */
1053
	PgStartTime = GetCurrentTimestamp();
1054 1055
	/* PostmasterRandom wants its own copy */
	gettimeofday(&random_start_time, NULL);
1056

1057
	/*
1058
	 * We're ready to rock and roll...
1059
	 */
1060
	StartupPID = StartupDataBase();
1061 1062
	Assert(StartupPID != 0);
	pmState = PM_STARTUP;
1063

1064 1065
	status = ServerLoop();

1066
	/*
B
Bruce Momjian 已提交
1067
	 * ServerLoop probably shouldn't ever return, but if it does, close down.
1068
	 */
1069
	ExitPostmaster(status != STATUS_OK);
1070

1071
	return 0;					/* not reached */
1072 1073
}

1074

1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103
/*
 * Compute and check the directory paths to files that are part of the
 * installation (as deduced from the postgres executable's own location)
 */
static void
getInstallationPaths(const char *argv0)
{
	DIR		   *pdir;

	/* Locate the postgres executable itself */
	if (find_my_exec(argv0, my_exec_path) < 0)
		elog(FATAL, "%s: could not locate my own executable path", argv0);

#ifdef EXEC_BACKEND
	/* Locate executable backend before we change working directory */
	if (find_other_exec(argv0, "postgres", PG_BACKEND_VERSIONSTR,
						postgres_exec_path) < 0)
		ereport(FATAL,
				(errmsg("%s: could not locate matching postgres executable",
						argv0)));
#endif

	/*
	 * Locate the pkglib directory --- this has to be set early in case we try
	 * to load any modules from it in response to postgresql.conf entries.
	 */
	get_pkglib_path(my_exec_path, pkglib_path);

	/*
1104 1105 1106 1107 1108
	 * Verify that there's a readable directory there; otherwise the Postgres
	 * installation is incomplete or corrupt.  (A typical cause of this
	 * failure is that the postgres executable has been moved or hardlinked to
	 * some directory that's not a sibling of the installation lib/
	 * directory.)
1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120
	 */
	pdir = AllocateDir(pkglib_path);
	if (pdir == NULL)
		ereport(ERROR,
				(errcode_for_file_access(),
				 errmsg("could not open directory \"%s\": %m",
						pkglib_path),
				 errhint("This may indicate an incomplete PostgreSQL installation, or that the file \"%s\" has been moved away from its proper location.",
						 my_exec_path)));
	FreeDir(pdir);

	/*
1121 1122
	 * XXX is it worth similarly checking the share/ directory?  If the lib/
	 * directory is there, then share/ probably is too.
1123 1124 1125 1126
	 */
}


1127 1128 1129 1130
/*
 * Validate the proposed data directory
 */
static void
1131
checkDataDir(void)
1132 1133 1134 1135 1136
{
	char		path[MAXPGPATH];
	FILE	   *fp;
	struct stat stat_buf;

1137
	Assert(DataDir);
1138

1139
	if (stat(DataDir, &stat_buf) != 0)
1140 1141 1142 1143
	{
		if (errno == ENOENT)
			ereport(FATAL,
					(errcode_for_file_access(),
1144
					 errmsg("data directory \"%s\" does not exist",
1145
							DataDir)));
1146 1147 1148
		else
			ereport(FATAL,
					(errcode_for_file_access(),
B
Bruce Momjian 已提交
1149 1150
				 errmsg("could not read permissions of directory \"%s\": %m",
						DataDir)));
1151 1152
	}

1153 1154 1155 1156 1157 1158 1159
	/* eventual chdir would fail anyway, but let's test ... */
	if (!S_ISDIR(stat_buf.st_mode))
		ereport(FATAL,
				(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
				 errmsg("specified data directory \"%s\" is not a directory",
						DataDir)));

1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177
	/*
	 * Check that the directory belongs to my userid; if not, reject.
	 *
	 * This check is an essential part of the interlock that prevents two
	 * postmasters from starting in the same directory (see CreateLockFile()).
	 * Do not remove or weaken it.
	 *
	 * XXX can we safely enable this check on Windows?
	 */
#if !defined(WIN32) && !defined(__CYGWIN__)
	if (stat_buf.st_uid != geteuid())
		ereport(FATAL,
				(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
				 errmsg("data directory \"%s\" has wrong ownership",
						DataDir),
				 errhint("The server must be started by the user that owns the data directory.")));
#endif

1178 1179 1180
	/*
	 * Check if the directory has group or world access.  If so, reject.
	 *
1181 1182 1183 1184 1185
	 * It would be possible to allow weaker constraints (for example, allow
	 * group access) but we cannot make a general assumption that that is
	 * okay; for example there are platforms where nearly all users
	 * customarily belong to the same group.  Perhaps this test should be
	 * configurable.
1186
	 *
1187 1188
	 * XXX temporarily suppress check when on Windows, because there may not
	 * be proper support for Unix-y file permissions.  Need to think of a
1189 1190
	 * reasonable check to apply on Windows.
	 */
1191
#if !defined(WIN32) && !defined(__CYGWIN__)
1192 1193 1194 1195
	if (stat_buf.st_mode & (S_IRWXG | S_IRWXO))
		ereport(FATAL,
				(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
				 errmsg("data directory \"%s\" has group or world access",
1196
						DataDir),
1197 1198 1199 1200
				 errdetail("Permissions should be u=rwx (0700).")));
#endif

	/* Look for PG_VERSION before looking for pg_control */
1201
	ValidatePgVersion(DataDir);
1202

1203
	snprintf(path, sizeof(path), "%s/global/pg_control", DataDir);
1204 1205 1206 1207

	fp = AllocateFile(path, PG_BINARY_R);
	if (fp == NULL)
	{
1208 1209 1210
		write_stderr("%s: could not find the database system\n"
					 "Expected to find it in the directory \"%s\",\n"
					 "but could not open file \"%s\": %s\n",
1211
					 progname, DataDir, path, strerror(errno));
1212 1213 1214 1215 1216 1217 1218
		ExitPostmaster(2);
	}
	FreeFile(fp);
}


/*
1219 1220 1221 1222 1223
 * Fork away from the controlling terminal (silent_mode option)
 *
 * Since this requires disconnecting from stdin/stdout/stderr (in case they're
 * linked to the terminal), we re-point stdin to /dev/null and stdout/stderr
 * to "postmaster.log" in the data directory, where we're already chdir'd.
1224
 */
1225
static void
1226
pmdaemonize(void)
1227
{
1228
#ifndef WIN32
1229 1230 1231
	const char *pmlogname = "postmaster.log";
	int			dvnull;
	int			pmlog;
1232
	pid_t		pid;
1233
	int			res;
B
Bruce Momjian 已提交
1234

1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258
	/*
	 * Make sure we can open the files we're going to redirect to.  If this
	 * fails, we want to complain before disconnecting.  Mention the full path
	 * of the logfile in the error message, even though we address it by
	 * relative path.
	 */
	dvnull = open(DEVNULL, O_RDONLY, 0);
	if (dvnull < 0)
	{
		write_stderr("%s: could not open file \"%s\": %s\n",
					 progname, DEVNULL, strerror(errno));
		ExitPostmaster(1);
	}
	pmlog = open(pmlogname, O_CREAT | O_WRONLY | O_APPEND, 0600);
	if (pmlog < 0)
	{
		write_stderr("%s: could not open log file \"%s/%s\": %s\n",
					 progname, DataDir, pmlogname, strerror(errno));
		ExitPostmaster(1);
	}

	/*
	 * Okay to fork.
	 */
1259
	pid = fork_process();
1260
	if (pid == (pid_t) -1)
1261
	{
1262 1263
		write_stderr("%s: could not fork background process: %s\n",
					 progname, strerror(errno));
1264
		ExitPostmaster(1);
1265 1266 1267
	}
	else if (pid)
	{							/* parent */
1268 1269
		/* Parent should just exit, without doing any atexit cleanup */
		_exit(0);
1270
	}
T
Tatsuo Ishii 已提交
1271

B
Bruce Momjian 已提交
1272
	MyProcPid = PostmasterPid = getpid();		/* reset PID vars to child */
1273

1274 1275
	MyStartTime = time(NULL);

1276
	/*
1277 1278
	 * Some systems use setsid() to dissociate from the TTY's process group,
	 * while on others it depends on stdin/stdout/stderr.  Do both if possible.
1279
	 */
1280
#ifdef HAVE_SETSID
1281 1282
	if (setsid() < 0)
	{
1283 1284
		write_stderr("%s: could not dissociate from controlling TTY: %s\n",
					 progname, strerror(errno));
1285
		ExitPostmaster(1);
1286
	}
1287
#endif
1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304

	/*
	 * Reassociate stdin/stdout/stderr.  fork_process() cleared any pending
	 * output, so this should be safe.  The only plausible error is EINTR,
	 * which just means we should retry.
	 */
	do {
		res = dup2(dvnull, 0);
	} while (res < 0 && errno == EINTR);
	close(dvnull);
	do {
		res = dup2(pmlog, 1);
	} while (res < 0 && errno == EINTR);
	do {
		res = dup2(pmlog, 2);
	} while (res < 0 && errno == EINTR);
	close(pmlog);
B
Bruce Momjian 已提交
1305
#else							/* WIN32 */
1306
	/* not supported */
1307
	elog(FATAL, "silent_mode is not supported under Windows");
B
Bruce Momjian 已提交
1308
#endif   /* WIN32 */
1309 1310
}

1311

1312
/*
1313
 * Main idle loop of postmaster
1314
 */
1315
static int
1316
ServerLoop(void)
1317
{
B
Bruce Momjian 已提交
1318
	fd_set		readmask;
1319
	int			nSockets;
1320 1321
	time_t		now,
				last_touch_time;
1322

1323
	last_touch_time = time(NULL);
1324

1325
	nSockets = initMasks(&readmask);
1326 1327 1328

	for (;;)
	{
1329
		fd_set		rmask;
1330
		int			selres;
1331 1332

		/*
1333
		 * Wait for a connection request to arrive.
1334
		 *
1335 1336
		 * We wait at most one minute, to ensure that the other background
		 * tasks handled below get done even when no requests are arriving.
1337
		 *
B
Bruce Momjian 已提交
1338 1339 1340
		 * If we are in PM_WAIT_DEAD_END state, then we don't want to accept
		 * any new connections, so we don't call select() at all; just sleep
		 * for a little bit with signals unblocked.
1341
		 */
1342 1343 1344
		memcpy((char *) &rmask, (char *) &readmask, sizeof(fd_set));

		PG_SETMASK(&UnBlockSig);
1345

1346 1347
		if (pmState == PM_WAIT_DEAD_END)
		{
B
Bruce Momjian 已提交
1348
			pg_usleep(100000L); /* 100 msec seems reasonable */
1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360
			selres = 0;
		}
		else
		{
			/* must set timeout each time; some OSes change it! */
			struct timeval timeout;

			timeout.tv_sec = 60;
			timeout.tv_usec = 0;

			selres = select(nSockets, &rmask, NULL, NULL, &timeout);
		}
J
Jan Wieck 已提交
1361

1362
		/*
B
Bruce Momjian 已提交
1363 1364
		 * Block all signals until we wait again.  (This makes it safe for our
		 * signal handlers to do nontrivial work.)
1365
		 */
1366
		PG_SETMASK(&BlockSig);
1367

1368
		/* Now check the select() result */
1369
		if (selres < 0)
1370
		{
1371 1372 1373 1374 1375 1376 1377
			if (errno != EINTR && errno != EWOULDBLOCK)
			{
				ereport(LOG,
						(errcode_for_socket_access(),
						 errmsg("select() failed in postmaster: %m")));
				return STATUS_ERROR;
			}
1378
		}
1379 1380

		/*
B
Bruce Momjian 已提交
1381 1382
		 * New connection pending on any of our sockets? If so, fork a child
		 * process to deal with it.
1383
		 */
1384
		if (selres > 0)
1385
		{
1386 1387
			int			i;

1388
			for (i = 0; i < MAXLISTEN; i++)
1389
			{
1390 1391 1392
				if (ListenSocket[i] == -1)
					break;
				if (FD_ISSET(ListenSocket[i], &rmask))
B
Bruce Momjian 已提交
1393
				{
1394 1395
					Port	   *port;

1396 1397 1398 1399 1400 1401
					port = ConnCreate(ListenSocket[i]);
					if (port)
					{
						BackendStartup(port);

						/*
B
Bruce Momjian 已提交
1402 1403
						 * We no longer need the open socket or port structure
						 * in this process
1404 1405 1406 1407
						 */
						StreamClose(port->sock);
						ConnFree(port);
					}
B
Bruce Momjian 已提交
1408
				}
1409
			}
1410
		}
1411

1412 1413
		/* If we have lost the log collector, try to start a new one */
		if (SysLoggerPID == 0 && Logging_collector)
1414 1415
			SysLoggerPID = SysLogger_Start();

1416
		/*
B
Bruce Momjian 已提交
1417 1418
		 * If no background writer process is running, and we are not in a
		 * state that prevents it, start one.  It doesn't matter if this
1419 1420
		 * fails, we'll just try again later.
		 */
1421
		if (BgWriterPID == 0 &&
1422
			(pmState == PM_RUN || pmState == PM_RECOVERY ||
1423
			 pmState == PM_RECOVERY_CONSISTENT))
1424 1425
			BgWriterPID = StartBackgroundWriter();

1426
		/*
B
Bruce Momjian 已提交
1427 1428
		 * Likewise, if we have lost the walwriter process, try to start a new
		 * one.
1429
		 */
1430
		if (WalWriterPID == 0 && pmState == PM_RUN)
1431 1432
			WalWriterPID = StartWalWriter();

1433
		/* If we have lost the autovacuum launcher, try to start a new one */
1434 1435
		if (AutoVacPID == 0 &&
			(AutoVacuumingActive() || start_autovac_launcher) &&
1436
			pmState == PM_RUN)
1437
		{
1438
			AutoVacPID = StartAutoVacLauncher();
1439
			if (AutoVacPID != 0)
B
Bruce Momjian 已提交
1440
				start_autovac_launcher = false; /* signal processed */
1441
		}
1442

1443 1444
		/* If we have lost the archiver, try to start a new one */
		if (XLogArchivingActive() && PgArchPID == 0 && pmState == PM_RUN)
1445
			PgArchPID = pgarch_start();
B
Bruce Momjian 已提交
1446

1447
		/* If we have lost the stats collector, try to start a new one */
1448
		if (PgStatPID == 0 && pmState == PM_RUN)
1449
			PgStatPID = pgstat_start();
1450

1451 1452 1453 1454 1455
		/* If we need to signal the autovacuum launcher, do so now */
		if (avlauncher_needs_signal)
		{
			avlauncher_needs_signal = false;
			if (AutoVacPID != 0)
1456
				kill(AutoVacPID, SIGUSR2);
1457 1458
		}

1459
		/*
B
Bruce Momjian 已提交
1460 1461 1462
		 * Touch the socket and lock file every 58 minutes, to ensure that
		 * they are not removed by overzealous /tmp-cleaning tasks.  We assume
		 * no one runs cleaners with cutoff times of less than an hour ...
1463 1464
		 */
		now = time(NULL);
1465
		if (now - last_touch_time >= 58 * SECS_PER_MINUTE)
1466 1467 1468 1469 1470
		{
			TouchSocketFile();
			TouchSocketLockFile();
			last_touch_time = now;
		}
1471
	}
1472 1473
}

1474 1475

/*
1476 1477
 * Initialise the masks for select() for the ports we are listening on.
 * Return the number of sockets to listen on.
1478
 */
1479
static int
1480
initMasks(fd_set *rmask)
1481
{
1482
	int			maxsock = -1;
B
Bruce Momjian 已提交
1483
	int			i;
1484 1485 1486

	FD_ZERO(rmask);

B
Bruce Momjian 已提交
1487
	for (i = 0; i < MAXLISTEN; i++)
1488
	{
B
Bruce Momjian 已提交
1489
		int			fd = ListenSocket[i];
1490 1491 1492

		if (fd == -1)
			break;
1493 1494
		FD_SET		(fd, rmask);

1495 1496
		if (fd > maxsock)
			maxsock = fd;
1497
	}
1498

1499
	return maxsock + 1;
1500 1501
}

1502 1503

/*
1504
 * Read a client's startup packet and do something according to it.
1505
 *
1506
 * Returns STATUS_OK or STATUS_ERROR, or might call ereport(FATAL) and
1507
 * not return at all.
1508
 *
1509
 * (Note that ereport(FATAL) stuff is sent to the client, so only use it
1510 1511 1512
 * if that's what you want.  Return STATUS_ERROR if you don't want to
 * send anything to the client, which would typically be appropriate
 * if we detect a communications failure.)
1513
 */
M
 
Marc G. Fournier 已提交
1514
static int
1515
ProcessStartupPacket(Port *port, bool SSLdone)
1516
{
1517 1518
	int32		len;
	void	   *buf;
1519 1520
	ProtocolVersion proto;
	MemoryContext oldcontext;
1521

1522 1523
	if (pq_getbytes((char *) &len, 4) == EOF)
	{
1524 1525
		/*
		 * EOF after SSLdone probably means the client didn't like our
B
Bruce Momjian 已提交
1526 1527
		 * response to NEGOTIATE_SSL_CODE.	That's not an error condition, so
		 * don't clutter the log with a complaint.
1528 1529
		 */
		if (!SSLdone)
1530 1531 1532
			ereport(COMMERROR,
					(errcode(ERRCODE_PROTOCOL_VIOLATION),
					 errmsg("incomplete startup packet")));
1533 1534 1535
		return STATUS_ERROR;
	}

1536 1537 1538
	len = ntohl(len);
	len -= 4;

1539 1540
	if (len < (int32) sizeof(ProtocolVersion) ||
		len > MAX_STARTUP_PACKET_LENGTH)
1541
	{
1542 1543 1544
		ereport(COMMERROR,
				(errcode(ERRCODE_PROTOCOL_VIOLATION),
				 errmsg("invalid length of startup packet")));
1545 1546
		return STATUS_ERROR;
	}
1547

1548 1549
	/*
	 * Allocate at least the size of an old-style startup packet, plus one
B
Bruce Momjian 已提交
1550 1551 1552
	 * extra byte, and make sure all are zeroes.  This ensures we will have
	 * null termination of all strings, in both fixed- and variable-length
	 * packet layouts.
1553 1554 1555 1556 1557
	 */
	if (len <= (int32) sizeof(StartupPacket))
		buf = palloc0(sizeof(StartupPacket) + 1);
	else
		buf = palloc0(len + 1);
1558 1559 1560

	if (pq_getbytes(buf, len) == EOF)
	{
1561 1562 1563
		ereport(COMMERROR,
				(errcode(ERRCODE_PROTOCOL_VIOLATION),
				 errmsg("incomplete startup packet")));
1564 1565
		return STATUS_ERROR;
	}
1566

1567 1568 1569
	/*
	 * The first field is either a protocol version number or a special
	 * request code.
M
 
Marc G. Fournier 已提交
1570
	 */
1571
	port->proto = proto = ntohl(*((ProtocolVersion *) buf));
M
 
Marc G. Fournier 已提交
1572

1573
	if (proto == CANCEL_REQUEST_CODE)
1574
	{
1575
		processCancelRequest(port, buf);
1576 1577
		/* Not really an error, but we don't want to proceed further */
		return STATUS_ERROR;
1578
	}
M
 
Marc G. Fournier 已提交
1579

1580
	if (proto == NEGOTIATE_SSL_CODE && !SSLdone)
1581 1582 1583
	{
		char		SSLok;

1584
#ifdef USE_SSL
1585
		/* No SSL when disabled or on Unix sockets */
1586
		if (!EnableSSL || IS_AF_UNIX(port->laddr.addr.ss_family))
1587
			SSLok = 'N';
B
Bruce Momjian 已提交
1588
		else
1589
			SSLok = 'S';		/* Support for SSL */
1590
#else
1591
		SSLok = 'N';			/* No support for SSL */
1592
#endif
1593 1594

retry1:
1595 1596
		if (send(port->sock, &SSLok, 1, 0) != 1)
		{
1597 1598
			if (errno == EINTR)
				goto retry1;	/* if interrupted, just retry */
1599 1600
			ereport(COMMERROR,
					(errcode_for_socket_access(),
B
Bruce Momjian 已提交
1601
					 errmsg("failed to send SSL negotiation response: %m")));
1602
			return STATUS_ERROR;	/* close the connection */
1603 1604
		}

1605
#ifdef USE_SSL
B
Bruce Momjian 已提交
1606
		if (SSLok == 'S' && secure_open_server(port) == -1)
B
Bruce Momjian 已提交
1607
			return STATUS_ERROR;
1608
#endif
1609 1610 1611
		/* regular startup packet, cancel, etc packet should follow... */
		/* but not another SSL negotiation request */
		return ProcessStartupPacket(port, true);
1612
	}
1613

M
 
Marc G. Fournier 已提交
1614 1615
	/* Could add additional special packet types here */

1616
	/*
B
Bruce Momjian 已提交
1617 1618
	 * Set FrontendProtocol now so that ereport() knows what format to send if
	 * we fail during startup.
1619 1620
	 */
	FrontendProtocol = proto;
1621

M
 
Marc G. Fournier 已提交
1622 1623
	/* Check we can handle the protocol the frontend is using. */

1624
	if (PG_PROTOCOL_MAJOR(proto) < PG_PROTOCOL_MAJOR(PG_PROTOCOL_EARLIEST) ||
B
Bruce Momjian 已提交
1625 1626 1627
		PG_PROTOCOL_MAJOR(proto) > PG_PROTOCOL_MAJOR(PG_PROTOCOL_LATEST) ||
		(PG_PROTOCOL_MAJOR(proto) == PG_PROTOCOL_MAJOR(PG_PROTOCOL_LATEST) &&
		 PG_PROTOCOL_MINOR(proto) > PG_PROTOCOL_MINOR(PG_PROTOCOL_LATEST)))
1628 1629 1630
		ereport(FATAL,
				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
				 errmsg("unsupported frontend protocol %u.%u: server supports %u.0 to %u.%u",
B
Bruce Momjian 已提交
1631
						PG_PROTOCOL_MAJOR(proto), PG_PROTOCOL_MINOR(proto),
1632 1633 1634
						PG_PROTOCOL_MAJOR(PG_PROTOCOL_EARLIEST),
						PG_PROTOCOL_MAJOR(PG_PROTOCOL_LATEST),
						PG_PROTOCOL_MINOR(PG_PROTOCOL_LATEST))));
M
 
Marc G. Fournier 已提交
1635

B
Bruce Momjian 已提交
1636
	/*
B
Bruce Momjian 已提交
1637 1638
	 * Now fetch parameters out of startup packet and save them into the Port
	 * structure.  All data structures attached to the Port struct must be
1639 1640 1641 1642
	 * allocated in TopMemoryContext so that they will remain available in
	 * a running backend (even after PostmasterContext is destroyed).  We need
	 * not worry about leaking this storage on failure, since we aren't in the
	 * postmaster process anymore.
B
Bruce Momjian 已提交
1643
	 */
1644 1645 1646 1647
	oldcontext = MemoryContextSwitchTo(TopMemoryContext);

	if (PG_PROTOCOL_MAJOR(proto) >= 3)
	{
B
Bruce Momjian 已提交
1648
		int32		offset = sizeof(ProtocolVersion);
1649 1650

		/*
B
Bruce Momjian 已提交
1651 1652 1653
		 * Scan packet body for name/option pairs.	We can assume any string
		 * beginning within the packet body is null-terminated, thanks to
		 * zeroing extra byte above.
1654 1655 1656 1657 1658
		 */
		port->guc_options = NIL;

		while (offset < len)
		{
B
Bruce Momjian 已提交
1659 1660 1661
			char	   *nameptr = ((char *) buf) + offset;
			int32		valoffset;
			char	   *valptr;
1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685

			if (*nameptr == '\0')
				break;			/* found packet terminator */
			valoffset = offset + strlen(nameptr) + 1;
			if (valoffset >= len)
				break;			/* missing value, will complain below */
			valptr = ((char *) buf) + valoffset;

			if (strcmp(nameptr, "database") == 0)
				port->database_name = pstrdup(valptr);
			else if (strcmp(nameptr, "user") == 0)
				port->user_name = pstrdup(valptr);
			else if (strcmp(nameptr, "options") == 0)
				port->cmdline_options = pstrdup(valptr);
			else
			{
				/* Assume it's a generic GUC option */
				port->guc_options = lappend(port->guc_options,
											pstrdup(nameptr));
				port->guc_options = lappend(port->guc_options,
											pstrdup(valptr));
			}
			offset = valoffset + strlen(valptr) + 1;
		}
B
Bruce Momjian 已提交
1686

1687 1688 1689 1690
		/*
		 * If we didn't find a packet terminator exactly at the end of the
		 * given packet length, complain.
		 */
B
Bruce Momjian 已提交
1691
		if (offset != len - 1)
1692 1693 1694
			ereport(FATAL,
					(errcode(ERRCODE_PROTOCOL_VIOLATION),
					 errmsg("invalid startup packet layout: expected terminator as last byte")));
1695 1696 1697 1698
	}
	else
	{
		/*
B
Bruce Momjian 已提交
1699 1700 1701 1702
		 * Get the parameters from the old-style, fixed-width-fields startup
		 * packet as C strings.  The packet destination was cleared first so a
		 * short packet has zeros silently added.  We have to be prepared to
		 * truncate the pstrdup result for oversize fields, though.
1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716
		 */
		StartupPacket *packet = (StartupPacket *) buf;

		port->database_name = pstrdup(packet->database);
		if (strlen(port->database_name) > sizeof(packet->database))
			port->database_name[sizeof(packet->database)] = '\0';
		port->user_name = pstrdup(packet->user);
		if (strlen(port->user_name) > sizeof(packet->user))
			port->user_name[sizeof(packet->user)] = '\0';
		port->cmdline_options = pstrdup(packet->options);
		if (strlen(port->cmdline_options) > sizeof(packet->options))
			port->cmdline_options[sizeof(packet->options)] = '\0';
		port->guc_options = NIL;
	}
1717

1718
	/* Check a user name was given. */
1719
	if (port->user_name == NULL || port->user_name[0] == '\0')
1720 1721
		ereport(FATAL,
				(errcode(ERRCODE_INVALID_AUTHORIZATION_SPECIFICATION),
B
Bruce Momjian 已提交
1722
			 errmsg("no PostgreSQL user name specified in startup packet")));
1723

1724 1725 1726 1727
	/* The database defaults to the user name. */
	if (port->database_name == NULL || port->database_name[0] == '\0')
		port->database_name = pstrdup(port->user_name);

1728
	if (Db_user_namespace)
B
Bruce Momjian 已提交
1729
	{
1730
		/*
B
Bruce Momjian 已提交
1731 1732 1733 1734
		 * If user@, it is a global user, remove '@'. We only want to do this
		 * if there is an '@' at the end and no earlier in the user string or
		 * they may fake as a local user of another database attaching to this
		 * database.
1735
		 */
1736 1737 1738
		if (strchr(port->user_name, '@') ==
			port->user_name + strlen(port->user_name) - 1)
			*strchr(port->user_name, '@') = '\0';
1739 1740 1741
		else
		{
			/* Append '@' and dbname */
1742
			char	   *db_user;
B
Bruce Momjian 已提交
1743

1744 1745 1746 1747
			db_user = palloc(strlen(port->user_name) +
							 strlen(port->database_name) + 2);
			sprintf(db_user, "%s@%s", port->user_name, port->database_name);
			port->user_name = db_user;
1748 1749 1750
		}
	}

1751
	/*
B
Bruce Momjian 已提交
1752 1753
	 * Truncate given database and user names to length of a Postgres name.
	 * This avoids lookup failures when overlength names are given.
1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764
	 */
	if (strlen(port->database_name) >= NAMEDATALEN)
		port->database_name[NAMEDATALEN - 1] = '\0';
	if (strlen(port->user_name) >= NAMEDATALEN)
		port->user_name[NAMEDATALEN - 1] = '\0';

	/*
	 * Done putting stuff in TopMemoryContext.
	 */
	MemoryContextSwitchTo(oldcontext);

1765
	/*
B
Bruce Momjian 已提交
1766 1767 1768
	 * If we're going to reject the connection due to database state, say so
	 * now instead of wasting cycles on an authentication exchange. (This also
	 * allows a pg_ping utility to be written.)
1769
	 */
1770
	switch (port->canAcceptConnections)
1771 1772
	{
		case CAC_STARTUP:
1773 1774 1775
			ereport(FATAL,
					(errcode(ERRCODE_CANNOT_CONNECT_NOW),
					 errmsg("the database system is starting up")));
1776 1777
			break;
		case CAC_SHUTDOWN:
1778 1779 1780
			ereport(FATAL,
					(errcode(ERRCODE_CANNOT_CONNECT_NOW),
					 errmsg("the database system is shutting down")));
1781 1782
			break;
		case CAC_RECOVERY:
1783 1784 1785
			ereport(FATAL,
					(errcode(ERRCODE_CANNOT_CONNECT_NOW),
					 errmsg("the database system is in recovery mode")));
1786 1787
			break;
		case CAC_TOOMANY:
1788 1789 1790
			ereport(FATAL,
					(errcode(ERRCODE_TOO_MANY_CONNECTIONS),
					 errmsg("sorry, too many clients already")));
1791
			break;
1792 1793 1794
		case CAC_WAITBACKUP:
			/* OK for now, will check in InitPostgres */
			break;
1795
		case CAC_OK:
1796
			break;
1797
	}
1798

1799
	return STATUS_OK;
M
 
Marc G. Fournier 已提交
1800 1801
}

1802

M
 
Marc G. Fournier 已提交
1803 1804
/*
 * The client has sent a cancel request packet, not a normal
1805 1806
 * start-a-new-connection packet.  Perform the necessary processing.
 * Nothing is sent back to the client.
M
 
Marc G. Fournier 已提交
1807
 */
1808 1809
static void
processCancelRequest(Port *port, void *pkt)
M
 
Marc G. Fournier 已提交
1810
{
1811
	CancelRequestPacket *canc = (CancelRequestPacket *) pkt;
M
 
Marc G. Fournier 已提交
1812 1813
	int			backendPID;
	long		cancelAuthCode;
1814
	Backend    *bp;
B
Bruce Momjian 已提交
1815

1816 1817 1818
#ifndef EXEC_BACKEND
	Dlelem	   *curr;
#else
B
Bruce Momjian 已提交
1819
	int			i;
1820
#endif
M
 
Marc G. Fournier 已提交
1821 1822 1823 1824

	backendPID = (int) ntohl(canc->backendPID);
	cancelAuthCode = (long) ntohl(canc->cancelAuthCode);

1825
	/*
B
Bruce Momjian 已提交
1826 1827 1828
	 * See if we have a matching backend.  In the EXEC_BACKEND case, we can no
	 * longer access the postmaster's own backend list, and must rely on the
	 * duplicate array in shared memory.
1829
	 */
1830
#ifndef EXEC_BACKEND
M
 
Marc G. Fournier 已提交
1831 1832 1833
	for (curr = DLGetHead(BackendList); curr; curr = DLGetSucc(curr))
	{
		bp = (Backend *) DLE_VAL(curr);
1834
#else
1835
	for (i = MaxLivePostmasterChildren() - 1; i >= 0; i--)
1836
	{
B
Bruce Momjian 已提交
1837
		bp = (Backend *) &ShmemBackendArray[i];
1838
#endif
M
 
Marc G. Fournier 已提交
1839 1840 1841 1842 1843
		if (bp->pid == backendPID)
		{
			if (bp->cancel_key == cancelAuthCode)
			{
				/* Found a match; signal that backend to cancel current op */
1844 1845 1846
				ereport(DEBUG2,
						(errmsg_internal("processing cancel request: sending SIGINT to process %d",
										 backendPID)));
1847
				signal_child(bp->pid, SIGINT);
M
 
Marc G. Fournier 已提交
1848 1849 1850
			}
			else
				/* Right PID, wrong key: no way, Jose */
1851
				ereport(LOG,
P
Peter Eisentraut 已提交
1852
						(errmsg("wrong key in cancel request for process %d",
1853
								backendPID)));
1854
			return;
M
 
Marc G. Fournier 已提交
1855 1856 1857 1858
		}
	}

	/* No matching backend */
1859
	ereport(LOG,
P
Peter Eisentraut 已提交
1860
			(errmsg("PID %d in cancel request did not match any process",
1861
					backendPID)));
1862 1863
}

1864 1865 1866
/*
 * canAcceptConnections --- check to see if database state allows connections.
 */
1867
static enum CAC_state
1868 1869
canAcceptConnections(void)
{
1870 1871
	/*
	 * Can't start backends when in startup/shutdown/recovery state.
1872 1873 1874 1875
	 *
	 * In state PM_WAIT_BACKUP only superusers can connect (this must be
	 * allowed so that a superuser can end online backup mode); we return
	 * CAC_WAITBACKUP code to indicate that this must be checked later.
1876
	 */
1877
	if (pmState != PM_RUN)
1878
	{
1879
		if (pmState == PM_WAIT_BACKUP)
1880
			return CAC_WAITBACKUP;		/* allow superusers only */
1881
		if (Shutdown > NoShutdown)
B
Bruce Momjian 已提交
1882
			return CAC_SHUTDOWN;	/* shutdown is pending */
1883 1884 1885 1886
		if (!FatalError &&
			(pmState == PM_STARTUP ||
			 pmState == PM_RECOVERY ||
			 pmState == PM_RECOVERY_CONSISTENT))
B
Bruce Momjian 已提交
1887 1888
			return CAC_STARTUP; /* normal startup */
		return CAC_RECOVERY;	/* else must be crash recovery */
1889
	}
1890

1891 1892 1893
	/*
	 * Don't start too many children.
	 *
1894
	 * We allow more connections than we can have backends here because some
B
Bruce Momjian 已提交
1895 1896 1897 1898
	 * might still be authenticating; they might fail auth, or some existing
	 * backend might exit before the auth cycle is completed. The exact
	 * MaxBackends limit is enforced when a new backend tries to join the
	 * shared-inval backend array.
1899
	 *
1900 1901
	 * The limit here must match the sizes of the per-child-process arrays;
	 * see comments for MaxLivePostmasterChildren().
1902
	 */
1903
	if (CountChildren() >= MaxLivePostmasterChildren())
1904
		return CAC_TOOMANY;
1905

1906
	return CAC_OK;
1907
}
1908

1909

1910 1911 1912
/*
 * ConnCreate -- create a local connection data structure
 */
1913 1914
static Port *
ConnCreate(int serverFd)
1915
{
1916
	Port	   *port;
1917 1918 1919

	if (!(port = (Port *) calloc(1, sizeof(Port))))
	{
1920 1921 1922
		ereport(LOG,
				(errcode(ERRCODE_OUT_OF_MEMORY),
				 errmsg("out of memory")));
1923 1924 1925
		ExitPostmaster(1);
	}

1926
	if (StreamConnection(serverFd, port) != STATUS_OK)
1927
	{
1928 1929
		if (port->sock >= 0)
			StreamClose(port->sock);
1930
		ConnFree(port);
1931
		port = NULL;
1932 1933 1934
	}
	else
	{
1935
		/*
B
Bruce Momjian 已提交
1936 1937 1938 1939 1940
		 * Precompute password salt values to use for this connection. It's
		 * slightly annoying to do this long in advance of knowing whether
		 * we'll need 'em or not, but we must do the random() calls before we
		 * fork, not after.  Else the postmaster's random sequence won't get
		 * advanced, and all backends would end up using the same salt...
1941
		 */
1942
		RandomSalt(port->md5Salt);
1943 1944
	}

1945
	/*
B
Bruce Momjian 已提交
1946
	 * Allocate GSSAPI specific state struct
1947
	 */
1948
#ifndef EXEC_BACKEND
B
Bruce Momjian 已提交
1949 1950
#if defined(ENABLE_GSS) || defined(ENABLE_SSPI)
	port->gss = (pg_gssinfo *) calloc(1, sizeof(pg_gssinfo));
1951 1952 1953 1954 1955 1956 1957
	if (!port->gss)
	{
		ereport(LOG,
				(errcode(ERRCODE_OUT_OF_MEMORY),
				 errmsg("out of memory")));
		ExitPostmaster(1);
	}
1958
#endif
1959 1960
#endif

1961
	return port;
1962 1963
}

1964

1965
/*
1966
 * ConnFree -- free a local connection data structure
1967
 */
1968
static void
1969
ConnFree(Port *conn)
1970 1971
{
#ifdef USE_SSL
B
Bruce Momjian 已提交
1972
	secure_close(conn);
1973
#endif
1974 1975
	if (conn->gss)
		free(conn->gss);
1976 1977 1978
	free(conn);
}

1979

1980 1981 1982 1983
/*
 * ClosePostmasterPorts -- close all the postmaster's open sockets
 *
 * This is called during child process startup to release file descriptors
1984 1985
 * that are not needed by that child process.  The postmaster still has
 * them open, of course.
1986 1987 1988
 *
 * Note: we pass am_syslogger as a boolean because we don't want to set
 * the global variable yet when this is called.
1989
 */
1990
void
1991
ClosePostmasterPorts(bool am_syslogger)
1992
{
B
Bruce Momjian 已提交
1993
	int			i;
B
Bruce Momjian 已提交
1994

1995
	/* Close the listen sockets */
B
Bruce Momjian 已提交
1996 1997 1998 1999 2000 2001 2002 2003
	for (i = 0; i < MAXLISTEN; i++)
	{
		if (ListenSocket[i] != -1)
		{
			StreamClose(ListenSocket[i]);
			ListenSocket[i] = -1;
		}
	}
2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017

	/* If using syslogger, close the read side of the pipe */
	if (!am_syslogger)
	{
#ifndef WIN32
		if (syslogPipe[0] >= 0)
			close(syslogPipe[0]);
		syslogPipe[0] = -1;
#else
		if (syslogPipe[0])
			CloseHandle(syslogPipe[0]);
		syslogPipe[0] = 0;
#endif
	}
2018 2019 2020 2021 2022 2023

#ifdef USE_BONJOUR
	/* If using Bonjour, close the connection to the mDNS daemon */
	if (bonjour_sdref)
		close(DNSServiceRefSockFD(bonjour_sdref));
#endif
2024 2025
}

2026

2027 2028 2029 2030
/*
 * reset_shared -- reset shared memory and semaphores
 */
static void
2031
reset_shared(int port)
2032
{
2033 2034
	/*
	 * Create or re-create shared memory and semaphores.
2035
	 *
B
Bruce Momjian 已提交
2036 2037 2038 2039
	 * Note: in each "cycle of life" we will normally assign the same IPC keys
	 * (if using SysV shmem and/or semas), since the port number is used to
	 * determine IPC keys.	This helps ensure that we will clean up dead IPC
	 * objects if the postmaster crashes and is restarted.
2040
	 */
2041
	CreateSharedMemoryAndSemaphores(false, port);
2042 2043
}

2044 2045

/*
T
Tom Lane 已提交
2046
 * SIGHUP -- reread config files, and tell children to do same
2047 2048
 */
static void
2049
SIGHUP_handler(SIGNAL_ARGS)
2050
{
2051 2052
	int			save_errno = errno;

2053 2054 2055 2056
	PG_SETMASK(&BlockSig);

	if (Shutdown <= SmartShutdown)
	{
2057
		ereport(LOG,
B
Bruce Momjian 已提交
2058
				(errmsg("received SIGHUP, reloading configuration files")));
2059
		ProcessConfigFile(PGC_SIGHUP);
2060
		SignalChildren(SIGHUP);
2061 2062
		if (StartupPID != 0)
			signal_child(StartupPID, SIGHUP);
2063
		if (BgWriterPID != 0)
2064
			signal_child(BgWriterPID, SIGHUP);
2065 2066
		if (WalWriterPID != 0)
			signal_child(WalWriterPID, SIGHUP);
2067
		if (AutoVacPID != 0)
2068
			signal_child(AutoVacPID, SIGHUP);
2069
		if (PgArchPID != 0)
2070
			signal_child(PgArchPID, SIGHUP);
2071
		if (SysLoggerPID != 0)
2072
			signal_child(SysLoggerPID, SIGHUP);
2073 2074
		if (PgStatPID != 0)
			signal_child(PgStatPID, SIGHUP);
2075 2076

		/* Reload authentication config files too */
2077 2078 2079 2080
		if (!load_hba())
			ereport(WARNING,
					(errmsg("pg_hba.conf not reloaded")));

B
Bruce Momjian 已提交
2081
		load_ident();
J
Jan Wieck 已提交
2082

2083 2084 2085 2086
#ifdef EXEC_BACKEND
		/* Update the starting-point file for future children */
		write_nondefault_variables(PGC_SIGHUP);
#endif
2087 2088
	}

2089 2090
	PG_SETMASK(&UnBlockSig);

2091
	errno = save_errno;
2092 2093 2094
}


2095
/*
T
Tom Lane 已提交
2096
 * pmdie -- signal handler for processing various postmaster signals.
2097 2098
 */
static void
2099
pmdie(SIGNAL_ARGS)
2100
{
2101 2102
	int			save_errno = errno;

2103
	PG_SETMASK(&BlockSig);
2104

2105 2106 2107
	ereport(DEBUG2,
			(errmsg_internal("postmaster received signal %d",
							 postgres_signal_arg)));
M
 
Marc G. Fournier 已提交
2108

2109
	switch (postgres_signal_arg)
2110
	{
M
 
Marc G. Fournier 已提交
2111
		case SIGTERM:
B
Bruce Momjian 已提交
2112

2113 2114 2115
			/*
			 * Smart Shutdown:
			 *
2116
			 * Wait for children to end their work, then shut down.
2117 2118
			 */
			if (Shutdown >= SmartShutdown)
2119
				break;
2120
			Shutdown = SmartShutdown;
2121 2122
			ereport(LOG,
					(errmsg("received smart shutdown request")));
2123

2124 2125
			if (pmState == PM_RUN || pmState == PM_RECOVERY ||
				pmState == PM_RECOVERY_CONSISTENT)
2126 2127 2128 2129 2130 2131 2132 2133 2134
			{
				/* autovacuum workers are told to shut down immediately */
				SignalAutovacWorkers(SIGTERM);
				/* and the autovac launcher too */
				if (AutoVacPID != 0)
					signal_child(AutoVacPID, SIGTERM);
				/* and the walwriter too */
				if (WalWriterPID != 0)
					signal_child(WalWriterPID, SIGTERM);
2135
				pmState = PM_WAIT_BACKUP;
2136
			}
2137

2138
			/*
2139 2140 2141
			 * Now wait for online backup mode to end and backends to exit.
			 * If that is already the case, PostmasterStateMachine will take
			 * the next step.
2142
			 */
2143
			PostmasterStateMachine();
2144
			break;
2145 2146

		case SIGINT:
B
Bruce Momjian 已提交
2147

2148 2149
			/*
			 * Fast Shutdown:
2150
			 *
2151 2152
			 * Abort all children with SIGTERM (rollback active transactions
			 * and exit) and shut down when they are gone.
2153 2154
			 */
			if (Shutdown >= FastShutdown)
2155
				break;
2156
			Shutdown = FastShutdown;
2157 2158
			ereport(LOG,
					(errmsg("received fast shutdown request")));
2159

2160 2161
			if (StartupPID != 0)
				signal_child(StartupPID, SIGTERM);
2162 2163 2164 2165 2166 2167 2168
			if (pmState == PM_RECOVERY)
			{
				/* only bgwriter is active in this state */
				pmState = PM_WAIT_BACKENDS;
			}
			if (pmState == PM_RUN ||
				pmState == PM_WAIT_BACKUP ||
2169
				pmState == PM_WAIT_BACKENDS ||
2170
				pmState == PM_RECOVERY_CONSISTENT)
2171
			{
2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182
				ereport(LOG,
						(errmsg("aborting any active transactions")));
				/* shut down all backends and autovac workers */
				SignalChildren(SIGTERM);
				/* and the autovac launcher too */
				if (AutoVacPID != 0)
					signal_child(AutoVacPID, SIGTERM);
				/* and the walwriter too */
				if (WalWriterPID != 0)
					signal_child(WalWriterPID, SIGTERM);
				pmState = PM_WAIT_BACKENDS;
M
 
Marc G. Fournier 已提交
2183
			}
2184

2185
			/*
2186 2187
			 * Now wait for backends to exit.  If there are none,
			 * PostmasterStateMachine will take the next step.
2188
			 */
2189
			PostmasterStateMachine();
2190
			break;
2191 2192

		case SIGQUIT:
B
Bruce Momjian 已提交
2193

2194
			/*
2195
			 * Immediate Shutdown:
2196
			 *
T
Tom Lane 已提交
2197
			 * abort all children with SIGQUIT and exit without attempt to
2198
			 * properly shut down data base system.
2199
			 */
2200 2201
			ereport(LOG,
					(errmsg("received immediate shutdown request")));
2202
			SignalChildren(SIGQUIT);
2203
			if (StartupPID != 0)
2204
				signal_child(StartupPID, SIGQUIT);
2205
			if (BgWriterPID != 0)
2206
				signal_child(BgWriterPID, SIGQUIT);
2207 2208
			if (WalWriterPID != 0)
				signal_child(WalWriterPID, SIGQUIT);
2209
			if (AutoVacPID != 0)
2210
				signal_child(AutoVacPID, SIGQUIT);
2211
			if (PgArchPID != 0)
2212
				signal_child(PgArchPID, SIGQUIT);
2213
			if (PgStatPID != 0)
2214
				signal_child(PgStatPID, SIGQUIT);
2215
			ExitPostmaster(0);
2216
			break;
M
 
Marc G. Fournier 已提交
2217 2218
	}

2219 2220 2221
	PG_SETMASK(&UnBlockSig);

	errno = save_errno;
2222 2223 2224
}

/*
2225
 * Reaper -- signal handler to cleanup after a child process dies.
2226 2227
 */
static void
2228
reaper(SIGNAL_ARGS)
2229
{
2230
	int			save_errno = errno;
2231 2232
	int			pid;			/* process id of dead child process */
	int			exitstatus;		/* its exit status */
2233

2234
	/* These macros hide platform variations in getting child status */
2235
#ifdef HAVE_WAITPID
2236
	int			status;			/* child exit status */
B
Bruce Momjian 已提交
2237

2238 2239
#define LOOPTEST()		((pid = waitpid(-1, &status, WNOHANG)) > 0)
#define LOOPHEADER()	(exitstatus = status)
B
Bruce Momjian 已提交
2240
#else							/* !HAVE_WAITPID */
2241
#ifndef WIN32
2242
	union wait	status;			/* child exit status */
B
Bruce Momjian 已提交
2243

2244 2245
#define LOOPTEST()		((pid = wait3(&status, WNOHANG, NULL)) > 0)
#define LOOPHEADER()	(exitstatus = status.w_status)
B
Bruce Momjian 已提交
2246
#else							/* WIN32 */
2247
#define LOOPTEST()		((pid = win32_waitpid(&exitstatus)) > 0)
2248
#define LOOPHEADER()
2249 2250
#endif   /* WIN32 */
#endif   /* HAVE_WAITPID */
2251

2252 2253
	PG_SETMASK(&BlockSig);

2254 2255
	ereport(DEBUG4,
			(errmsg_internal("reaping dead processes")));
2256 2257

	while (LOOPTEST())
2258
	{
2259
		LOOPHEADER();
2260

2261
		/*
2262
		 * Check if this child was a startup process.
2263
		 */
2264
		if (pid == StartupPID)
2265
		{
2266
			StartupPID = 0;
2267

2268 2269
			/*
			 * Unexpected exit of startup process (including FATAL exit)
2270 2271
			 * during PM_STARTUP is treated as catastrophic. There are no
			 * other processes running yet, so we can just exit.
2272
			 */
2273
			if (pmState == PM_STARTUP && !EXIT_STATUS_0(exitstatus))
2274
			{
2275
				LogChildExit(LOG, _("startup process"),
2276
							 pid, exitstatus);
2277
				ereport(LOG,
B
Bruce Momjian 已提交
2278
				(errmsg("aborting startup due to startup process failure")));
2279
				ExitPostmaster(1);
2280
			}
2281

2282
			/*
2283 2284
			 * Startup process exited in response to a shutdown request (or it
			 * completed normally regardless of the shutdown request).
2285 2286 2287 2288 2289 2290 2291 2292
			 */
			if (Shutdown > NoShutdown &&
				(EXIT_STATUS_0(exitstatus) || EXIT_STATUS_1(exitstatus)))
			{
				pmState = PM_WAIT_BACKENDS;
				/* PostmasterStateMachine logic does the rest */
				continue;
			}
2293

2294
			/*
2295
			 * Any unexpected exit (including FATAL exit) of the startup
2296 2297
			 * process is treated as a crash, except that we don't want to
			 * reinitialize.
2298
			 */
2299
			if (!EXIT_STATUS_0(exitstatus))
2300
			{
2301 2302 2303
				RecoveryError = true;
				HandleChildCrash(pid, exitstatus,
								 _("startup process"));
2304 2305
				continue;
			}
2306

2307
			/*
2308
			 * Startup succeeded, commence normal operations
2309
			 */
2310 2311 2312 2313 2314
			FatalError = false;
			pmState = PM_RUN;

			/*
			 * Crank up the background writer, if we didn't do that already
2315
			 * when we entered consistent recovery state.  It doesn't matter
2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335
			 * if this fails, we'll just try again later.
			 */
			if (BgWriterPID == 0)
				BgWriterPID = StartBackgroundWriter();

			/*
			 * Likewise, start other special children as needed.  In a restart
			 * situation, some of them may be alive already.
			 */
			if (WalWriterPID == 0)
				WalWriterPID = StartWalWriter();
			if (AutoVacuumingActive() && AutoVacPID == 0)
				AutoVacPID = StartAutoVacLauncher();
			if (XLogArchivingActive() && PgArchPID == 0)
				PgArchPID = pgarch_start();
			if (PgStatPID == 0)
				PgStatPID = pgstat_start();

			/* at this point we are really open for business */
			ereport(LOG,
2336 2337 2338
				 (errmsg("database system is ready to accept connections")));

			continue;
2339 2340 2341 2342 2343
		}

		/*
		 * Was it the bgwriter?
		 */
2344
		if (pid == BgWriterPID)
2345
		{
2346
			BgWriterPID = 0;
2347
			if (EXIT_STATUS_0(exitstatus) && pmState == PM_SHUTDOWN)
2348
			{
2349
				/*
B
Bruce Momjian 已提交
2350 2351 2352
				 * OK, we saw normal exit of the bgwriter after it's been told
				 * to shut down.  We expect that it wrote a shutdown
				 * checkpoint.	(If for some reason it didn't, recovery will
2353
				 * occur on next postmaster start.)
2354
				 *
2355 2356 2357 2358 2359 2360 2361
				 * At this point we should have no normal backend children
				 * left (else we'd not be in PM_SHUTDOWN state) but we might
				 * have dead_end children to wait for.
				 *
				 * If we have an archiver subprocess, tell it to do a last
				 * archive cycle and quit; otherwise we can go directly to
				 * PM_WAIT_DEAD_END state.
2362
				 */
2363
				Assert(Shutdown > NoShutdown);
2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379

				if (PgArchPID != 0)
				{
					/* Waken archiver for the last time */
					signal_child(PgArchPID, SIGUSR2);
					pmState = PM_SHUTDOWN_2;
				}
				else
					pmState = PM_WAIT_DEAD_END;

				/*
				 * We can also shut down the stats collector now; there's
				 * nothing left for it to do.
				 */
				if (PgStatPID != 0)
					signal_child(PgStatPID, SIGQUIT);
2380
			}
2381
			else
2382
			{
2383 2384 2385 2386 2387 2388
				/*
				 * Any unexpected exit of the bgwriter (including FATAL exit)
				 * is treated as a crash.
				 */
				HandleChildCrash(pid, exitstatus,
								 _("background writer process"));
2389 2390
			}

2391
			continue;
2392
		}
2393

2394
		/*
B
Bruce Momjian 已提交
2395 2396 2397
		 * Was it the wal writer?  Normal exit can be ignored; we'll start a
		 * new one at the next iteration of the postmaster's main loop, if
		 * necessary.  Any other exit condition is treated as a crash.
2398
		 */
2399
		if (pid == WalWriterPID)
2400 2401 2402 2403
		{
			WalWriterPID = 0;
			if (!EXIT_STATUS_0(exitstatus))
				HandleChildCrash(pid, exitstatus,
P
Peter Eisentraut 已提交
2404
								 _("WAL writer process"));
2405 2406 2407
			continue;
		}

2408
		/*
B
Bruce Momjian 已提交
2409 2410 2411 2412
		 * Was it the autovacuum launcher?	Normal exit can be ignored; we'll
		 * start a new one at the next iteration of the postmaster's main
		 * loop, if necessary.	Any other exit condition is treated as a
		 * crash.
2413
		 */
2414
		if (pid == AutoVacPID)
2415 2416
		{
			AutoVacPID = 0;
2417
			if (!EXIT_STATUS_0(exitstatus))
2418
				HandleChildCrash(pid, exitstatus,
2419
								 _("autovacuum launcher process"));
2420 2421 2422
			continue;
		}

2423
		/*
B
Bruce Momjian 已提交
2424 2425
		 * Was it the archiver?  If so, just try to start a new one; no need
		 * to force reset of the rest of the system.  (If fail, we'll try
2426 2427
		 * again in future cycles of the main loop.)  But if we were waiting
		 * for it to shut down, advance to the next shutdown step.
2428
		 */
2429
		if (pid == PgArchPID)
2430 2431
		{
			PgArchPID = 0;
2432
			if (!EXIT_STATUS_0(exitstatus))
2433
				LogChildExit(LOG, _("archiver process"),
2434
							 pid, exitstatus);
2435
			if (XLogArchivingActive() && pmState == PM_RUN)
2436
				PgArchPID = pgarch_start();
2437 2438
			else if (pmState == PM_SHUTDOWN_2)
				pmState = PM_WAIT_DEAD_END;
2439 2440 2441
			continue;
		}

2442
		/*
B
Bruce Momjian 已提交
2443 2444 2445
		 * Was it the statistics collector?  If so, just try to start a new
		 * one; no need to force reset of the rest of the system.  (If fail,
		 * we'll try again in future cycles of the main loop.)
2446
		 */
2447
		if (pid == PgStatPID)
2448 2449
		{
			PgStatPID = 0;
2450
			if (!EXIT_STATUS_0(exitstatus))
2451
				LogChildExit(LOG, _("statistics collector process"),
2452
							 pid, exitstatus);
2453
			if (pmState == PM_RUN)
2454 2455 2456 2457
				PgStatPID = pgstat_start();
			continue;
		}

2458 2459
		/* Was it the system logger?  If so, try to start a new one */
		if (pid == SysLoggerPID)
2460 2461 2462 2463
		{
			SysLoggerPID = 0;
			/* for safety's sake, launch new logger *first* */
			SysLoggerPID = SysLogger_Start();
2464
			if (!EXIT_STATUS_0(exitstatus))
2465
				LogChildExit(LOG, _("system logger process"),
2466 2467 2468 2469
							 pid, exitstatus);
			continue;
		}

2470
		/*
2471
		 * Else do standard backend child cleanup.
2472
		 */
2473
		CleanupBackend(pid, exitstatus);
B
Bruce Momjian 已提交
2474
	}							/* loop over pending child-death reports */
2475

2476 2477 2478 2479 2480
	/*
	 * After cleaning out the SIGCHLD queue, see if we have any state changes
	 * or actions to make.
	 */
	PostmasterStateMachine();
2481

2482
	/* Done with signal handler */
2483 2484
	PG_SETMASK(&UnBlockSig);

2485
	errno = save_errno;
2486 2487
}

2488

2489
/*
2490
 * CleanupBackend -- cleanup after terminated backend.
2491 2492 2493 2494
 *
 * Remove all local state associated with backend.
 */
static void
2495
CleanupBackend(int pid,
B
Bruce Momjian 已提交
2496
			   int exitstatus)	/* child's exit status. */
2497
{
2498
	Dlelem	   *curr;
2499

2500
	LogChildExit(DEBUG2, _("server process"), pid, exitstatus);
2501 2502

	/*
2503 2504
	 * If a backend dies in an ugly way then we must signal all other backends
	 * to quickdie.  If exit status is zero (normal) or one (FATAL exit), we
2505 2506
	 * assume everything is all right and proceed to remove the backend from
	 * the active backend list.
2507
	 */
2508
	if (!EXIT_STATUS_0(exitstatus) && !EXIT_STATUS_1(exitstatus))
2509
	{
2510
		HandleChildCrash(pid, exitstatus, _("server process"));
2511 2512 2513 2514
		return;
	}

	for (curr = DLGetHead(BackendList); curr; curr = DLGetSucc(curr))
2515
	{
2516 2517 2518
		Backend    *bp = (Backend *) DLE_VAL(curr);

		if (bp->pid == pid)
2519
		{
2520
			if (!bp->dead_end)
2521 2522 2523 2524
			{
				if (!ReleasePostmasterChildSlot(bp->child_slot))
				{
					/*
2525 2526
					 * Uh-oh, the child failed to clean itself up.	Treat as a
					 * crash after all.
2527 2528 2529 2530 2531 2532
					 */
					HandleChildCrash(pid, exitstatus, _("server process"));
					return;
				}
#ifdef EXEC_BACKEND
				ShmemBackendArrayRemove(bp);
2533
#endif
2534
			}
2535 2536 2537 2538
			DLRemove(curr);
			free(bp);
			break;
		}
2539
	}
2540
}
2541

2542
/*
2543 2544
 * HandleChildCrash -- cleanup after failed backend, bgwriter, walwriter,
 * or autovacuum.
2545 2546 2547 2548 2549
 *
 * The objectives here are to clean up our local state about the child
 * process, and to signal all other remaining children to quickdie.
 */
static void
2550
HandleChildCrash(int pid, int exitstatus, const char *procname)
2551 2552 2553 2554
{
	Dlelem	   *curr,
			   *next;
	Backend    *bp;
2555

2556
	/*
B
Bruce Momjian 已提交
2557 2558
	 * Make log entry unless there was a previous crash (if so, nonzero exit
	 * status is to be expected in SIGQUIT response; don't clutter log)
2559
	 */
2560 2561
	if (!FatalError)
	{
2562
		LogChildExit(LOG, procname, pid, exitstatus);
2563
		ereport(LOG,
B
Bruce Momjian 已提交
2564
				(errmsg("terminating any other active server processes")));
2565
	}
2566

2567 2568
	/* Process regular backends */
	for (curr = DLGetHead(BackendList); curr; curr = next)
2569
	{
2570
		next = DLGetSucc(curr);
2571
		bp = (Backend *) DLE_VAL(curr);
2572 2573 2574 2575 2576
		if (bp->pid == pid)
		{
			/*
			 * Found entry for freshly-dead backend, so remove it.
			 */
2577
			if (!bp->dead_end)
2578 2579 2580 2581
			{
				(void) ReleasePostmasterChildSlot(bp->child_slot);
#ifdef EXEC_BACKEND
				ShmemBackendArrayRemove(bp);
2582
#endif
2583
			}
2584 2585 2586 2587 2588
			DLRemove(curr);
			free(bp);
			/* Keep looping so we can signal remaining backends */
		}
		else
2589
		{
2590
			/*
B
Bruce Momjian 已提交
2591 2592
			 * This backend is still alive.  Unless we did so already, tell it
			 * to commit hara-kiri.
2593
			 *
2594 2595 2596 2597
			 * SIGQUIT is the special signal that says exit without proc_exit
			 * and let the user know what's going on. But if SendStop is set
			 * (-s on command line), then we send SIGSTOP instead, so that we
			 * can get core dumps from all backends by hand.
2598 2599 2600
			 *
			 * We could exclude dead_end children here, but at least in the
			 * SIGSTOP case it seems better to include them.
2601 2602 2603
			 */
			if (!FatalError)
			{
2604 2605
				ereport(DEBUG2,
						(errmsg_internal("sending %s to process %d",
B
Bruce Momjian 已提交
2606
										 (SendStop ? "SIGSTOP" : "SIGQUIT"),
2607
										 (int) bp->pid)));
2608
				signal_child(bp->pid, (SendStop ? SIGSTOP : SIGQUIT));
2609
			}
2610 2611 2612
		}
	}

2613 2614 2615 2616 2617 2618 2619 2620 2621
	/* Take care of the startup process too */
	if (pid == StartupPID)
		StartupPID = 0;
	else if (StartupPID != 0 && !FatalError)
	{
		ereport(DEBUG2,
				(errmsg_internal("sending %s to process %d",
								 (SendStop ? "SIGSTOP" : "SIGQUIT"),
								 (int) StartupPID)));
2622
		signal_child(StartupPID, (SendStop ? SIGSTOP : SIGQUIT));
2623 2624
	}

2625 2626
	/* Take care of the bgwriter too */
	if (pid == BgWriterPID)
J
Jan Wieck 已提交
2627
		BgWriterPID = 0;
2628
	else if (BgWriterPID != 0 && !FatalError)
2629
	{
2630 2631 2632 2633
		ereport(DEBUG2,
				(errmsg_internal("sending %s to process %d",
								 (SendStop ? "SIGSTOP" : "SIGQUIT"),
								 (int) BgWriterPID)));
2634
		signal_child(BgWriterPID, (SendStop ? SIGSTOP : SIGQUIT));
2635
	}
2636

2637 2638 2639 2640 2641 2642 2643 2644 2645 2646 2647 2648
	/* Take care of the walwriter too */
	if (pid == WalWriterPID)
		WalWriterPID = 0;
	else if (WalWriterPID != 0 && !FatalError)
	{
		ereport(DEBUG2,
				(errmsg_internal("sending %s to process %d",
								 (SendStop ? "SIGSTOP" : "SIGQUIT"),
								 (int) WalWriterPID)));
		signal_child(WalWriterPID, (SendStop ? SIGSTOP : SIGQUIT));
	}

2649
	/* Take care of the autovacuum launcher too */
2650 2651 2652 2653 2654 2655 2656 2657
	if (pid == AutoVacPID)
		AutoVacPID = 0;
	else if (AutoVacPID != 0 && !FatalError)
	{
		ereport(DEBUG2,
				(errmsg_internal("sending %s to process %d",
								 (SendStop ? "SIGSTOP" : "SIGQUIT"),
								 (int) AutoVacPID)));
2658
		signal_child(AutoVacPID, (SendStop ? SIGSTOP : SIGQUIT));
2659 2660
	}

2661 2662 2663
	/*
	 * Force a power-cycle of the pgarch process too.  (This isn't absolutely
	 * necessary, but it seems like a good idea for robustness, and it
B
Bruce Momjian 已提交
2664 2665
	 * simplifies the state-machine logic in the case where a shutdown request
	 * arrives during crash processing.)
2666
	 */
2667 2668 2669 2670 2671 2672
	if (PgArchPID != 0 && !FatalError)
	{
		ereport(DEBUG2,
				(errmsg_internal("sending %s to process %d",
								 "SIGQUIT",
								 (int) PgArchPID)));
2673
		signal_child(PgArchPID, SIGQUIT);
2674 2675
	}

2676 2677 2678
	/*
	 * Force a power-cycle of the pgstat process too.  (This isn't absolutely
	 * necessary, but it seems like a good idea for robustness, and it
B
Bruce Momjian 已提交
2679 2680
	 * simplifies the state-machine logic in the case where a shutdown request
	 * arrives during crash processing.)
2681
	 */
2682 2683 2684 2685 2686 2687
	if (PgStatPID != 0 && !FatalError)
	{
		ereport(DEBUG2,
				(errmsg_internal("sending %s to process %d",
								 "SIGQUIT",
								 (int) PgStatPID)));
2688
		signal_child(PgStatPID, SIGQUIT);
2689
		allow_immediate_pgstat_restart();
2690 2691
	}

2692 2693
	/* We do NOT restart the syslogger */

2694
	FatalError = true;
2695
	/* We now transit into a state of waiting for children to die */
2696 2697 2698
	if (pmState == PM_RECOVERY ||
		pmState == PM_RECOVERY_CONSISTENT ||
		pmState == PM_RUN ||
2699 2700
		pmState == PM_WAIT_BACKUP ||
		pmState == PM_SHUTDOWN)
2701
		pmState = PM_WAIT_BACKENDS;
2702 2703
}

2704
/*
2705
 * Log the death of a child process.
2706
 */
2707
static void
2708
LogChildExit(int lev, const char *procname, int pid, int exitstatus)
2709 2710
{
	if (WIFEXITED(exitstatus))
2711
		ereport(lev,
B
Bruce Momjian 已提交
2712

2713 2714 2715
		/*------
		  translator: %s is a noun phrase describing a child process, such as
		  "server process" */
2716
				(errmsg("%s (PID %d) exited with exit code %d",
2717
						procname, pid, WEXITSTATUS(exitstatus))));
2718
	else if (WIFSIGNALED(exitstatus))
2719
#if defined(WIN32)
2720
		ereport(lev,
B
Bruce Momjian 已提交
2721

2722 2723 2724
		/*------
		  translator: %s is a noun phrase describing a child process, such as
		  "server process" */
2725
				(errmsg("%s (PID %d) was terminated by exception 0x%X",
2726
						procname, pid, WTERMSIG(exitstatus)),
P
Peter Eisentraut 已提交
2727
				 errhint("See C include file \"ntstatus.h\" for a description of the hexadecimal value.")));
2728
#elif defined(HAVE_DECL_SYS_SIGLIST) && HAVE_DECL_SYS_SIGLIST
B
Bruce Momjian 已提交
2729 2730 2731 2732 2733 2734 2735 2736 2737
	ereport(lev,

	/*------
	  translator: %s is a noun phrase describing a child process, such as
	  "server process" */
			(errmsg("%s (PID %d) was terminated by signal %d: %s",
					procname, pid, WTERMSIG(exitstatus),
					WTERMSIG(exitstatus) < NSIG ?
					sys_siglist[WTERMSIG(exitstatus)] : "(unknown)")));
2738
#else
2739
		ereport(lev,
2740

2741 2742 2743
		/*------
		  translator: %s is a noun phrase describing a child process, such as
		  "server process" */
2744 2745
				(errmsg("%s (PID %d) was terminated by signal %d",
						procname, pid, WTERMSIG(exitstatus))));
2746
#endif
2747
	else
2748
		ereport(lev,
B
Bruce Momjian 已提交
2749

2750 2751 2752
		/*------
		  translator: %s is a noun phrase describing a child process, such as
		  "server process" */
2753
				(errmsg("%s (PID %d) exited with unrecognized status %d",
2754
						procname, pid, exitstatus)));
2755 2756
}

2757 2758 2759 2760 2761 2762 2763 2764 2765
/*
 * Advance the postmaster's state machine and take actions as appropriate
 *
 * This is common code for pmdie() and reaper(), which receive the signals
 * that might mean we need to change state.
 */
static void
PostmasterStateMachine(void)
{
2766 2767 2768
	if (pmState == PM_WAIT_BACKUP)
	{
		/*
2769
		 * PM_WAIT_BACKUP state ends when online backup mode is not active.
2770 2771 2772 2773 2774
		 */
		if (!BackupInProgress())
			pmState = PM_WAIT_BACKENDS;
	}

2775
	/*
B
Bruce Momjian 已提交
2776 2777
	 * If we are in a state-machine state that implies waiting for backends to
	 * exit, see if they're all gone, and change state if so.
2778 2779 2780 2781 2782 2783
	 */
	if (pmState == PM_WAIT_BACKENDS)
	{
		/*
		 * PM_WAIT_BACKENDS state ends when we have no regular backends
		 * (including autovac workers) and no walwriter or autovac launcher.
B
Bruce Momjian 已提交
2784 2785 2786 2787
		 * If we are doing crash recovery then we expect the bgwriter to exit
		 * too, otherwise not.	The archiver, stats, and syslogger processes
		 * are disregarded since they are not connected to shared memory; we
		 * also disregard dead_end children here.
2788 2789 2790 2791 2792 2793 2794 2795 2796 2797
		 */
		if (CountChildren() == 0 &&
			StartupPID == 0 &&
			(BgWriterPID == 0 || !FatalError) &&
			WalWriterPID == 0 &&
			AutoVacPID == 0)
		{
			if (FatalError)
			{
				/*
B
Bruce Momjian 已提交
2798
				 * Start waiting for dead_end children to die.	This state
2799 2800 2801
				 * change causes ServerLoop to stop creating new ones.
				 */
				pmState = PM_WAIT_DEAD_END;
2802 2803

				/*
2804 2805
				 * We already SIGQUIT'd the archiver and stats processes, if
				 * any, when we entered FatalError state.
2806
				 */
2807 2808 2809 2810
			}
			else
			{
				/*
B
Bruce Momjian 已提交
2811 2812 2813
				 * If we get here, we are proceeding with normal shutdown. All
				 * the regular children are gone, and it's time to tell the
				 * bgwriter to do a shutdown checkpoint.
2814 2815 2816 2817 2818 2819 2820 2821 2822 2823 2824 2825 2826 2827
				 */
				Assert(Shutdown > NoShutdown);
				/* Start the bgwriter if not running */
				if (BgWriterPID == 0)
					BgWriterPID = StartBackgroundWriter();
				/* And tell it to shut down */
				if (BgWriterPID != 0)
				{
					signal_child(BgWriterPID, SIGUSR2);
					pmState = PM_SHUTDOWN;
				}
				else
				{
					/*
B
Bruce Momjian 已提交
2828 2829 2830 2831
					 * If we failed to fork a bgwriter, just shut down. Any
					 * required cleanup will happen at next restart. We set
					 * FatalError so that an "abnormal shutdown" message gets
					 * logged when we exit.
2832 2833 2834
					 */
					FatalError = true;
					pmState = PM_WAIT_DEAD_END;
2835 2836 2837 2838 2839 2840

					/* Kill the archiver and stats collector too */
					if (PgArchPID != 0)
						signal_child(PgArchPID, SIGQUIT);
					if (PgStatPID != 0)
						signal_child(PgStatPID, SIGQUIT);
2841 2842 2843 2844 2845 2846 2847 2848
				}
			}
		}
	}

	if (pmState == PM_WAIT_DEAD_END)
	{
		/*
B
Bruce Momjian 已提交
2849
		 * PM_WAIT_DEAD_END state ends when the BackendList is entirely empty
2850 2851 2852 2853 2854 2855 2856 2857 2858 2859
		 * (ie, no dead_end children remain), and the archiver and stats
		 * collector are gone too.
		 *
		 * The reason we wait for those two is to protect them against a new
		 * postmaster starting conflicting subprocesses; this isn't an
		 * ironclad protection, but it at least helps in the
		 * shutdown-and-immediately-restart scenario.  Note that they have
		 * already been sent appropriate shutdown signals, either during a
		 * normal state transition leading up to PM_WAIT_DEAD_END, or during
		 * FatalError processing.
2860
		 */
2861 2862
		if (DLGetHead(BackendList) == NULL &&
			PgArchPID == 0 && PgStatPID == 0)
2863 2864 2865 2866 2867 2868
		{
			/* These other guys should be dead already */
			Assert(StartupPID == 0);
			Assert(BgWriterPID == 0);
			Assert(WalWriterPID == 0);
			Assert(AutoVacPID == 0);
2869
			/* syslogger is not considered here */
2870 2871 2872 2873 2874 2875
			pmState = PM_NO_CHILDREN;
		}
	}

	/*
	 * If we've been told to shut down, we exit as soon as there are no
B
Bruce Momjian 已提交
2876
	 * remaining children.	If there was a crash, cleanup will occur at the
2877 2878 2879 2880 2881
	 * next startup.  (Before PostgreSQL 8.3, we tried to recover from the
	 * crash before exiting, but that seems unwise if we are quitting because
	 * we got SIGTERM from init --- there may well not be time for recovery
	 * before init decides to SIGKILL us.)
	 *
2882 2883 2884
	 * Note that the syslogger continues to run.  It will exit when it sees
	 * EOF on its input pipe, which happens when there are no more upstream
	 * processes.
2885 2886 2887 2888 2889 2890 2891 2892 2893 2894
	 */
	if (Shutdown > NoShutdown && pmState == PM_NO_CHILDREN)
	{
		if (FatalError)
		{
			ereport(LOG, (errmsg("abnormal database system shutdown")));
			ExitPostmaster(1);
		}
		else
		{
2895
			/*
2896 2897
			 * Terminate backup mode to avoid recovery after a clean fast
			 * shutdown.
2898 2899 2900
			 */
			CancelBackup();

2901 2902 2903 2904 2905
			/* Normal exit from the postmaster is here */
			ExitPostmaster(0);
		}
	}

2906
	/*
2907 2908 2909 2910
	 * If recovery failed, wait for all non-syslogger children to exit, and
	 * then exit postmaster. We don't try to reinitialize when recovery fails,
	 * because more than likely it will just fail again and we will keep
	 * trying forever.
2911 2912
	 */
	if (RecoveryError && pmState == PM_NO_CHILDREN)
2913
		ExitPostmaster(1);
2914

2915
	/*
2916 2917
	 * If we need to recover from a crash, wait for all non-syslogger children
	 * to exit, then reset shmem and StartupDataBase.
2918 2919 2920 2921 2922 2923
	 */
	if (FatalError && pmState == PM_NO_CHILDREN)
	{
		ereport(LOG,
				(errmsg("all server processes terminated; reinitializing")));

2924
		shmem_exit(1);
2925 2926 2927 2928 2929 2930 2931 2932 2933
		reset_shared(PostPortNumber);

		StartupPID = StartupDataBase();
		Assert(StartupPID != 0);
		pmState = PM_STARTUP;
	}
}


2934 2935 2936 2937 2938 2939 2940 2941 2942 2943 2944
/*
 * Send a signal to a postmaster child process
 *
 * On systems that have setsid(), each child process sets itself up as a
 * process group leader.  For signals that are generally interpreted in the
 * appropriate fashion, we signal the entire process group not just the
 * direct child process.  This allows us to, for example, SIGQUIT a blocked
 * archive_recovery script, or SIGINT a script being run by a backend via
 * system().
 *
 * There is a race condition for recently-forked children: they might not
B
Bruce Momjian 已提交
2945
 * have executed setsid() yet.	So we signal the child directly as well as
2946 2947 2948 2949 2950 2951 2952 2953 2954 2955 2956 2957 2958 2959 2960 2961 2962 2963 2964 2965 2966 2967 2968 2969 2970
 * the group.  We assume such a child will handle the signal before trying
 * to spawn any grandchild processes.  We also assume that signaling the
 * child twice will not cause any problems.
 */
static void
signal_child(pid_t pid, int signal)
{
	if (kill(pid, signal) < 0)
		elog(DEBUG3, "kill(%ld,%d) failed: %m", (long) pid, signal);
#ifdef HAVE_SETSID
	switch (signal)
	{
		case SIGINT:
		case SIGTERM:
		case SIGQUIT:
		case SIGSTOP:
			if (kill(-pid, signal) < 0)
				elog(DEBUG3, "kill(%ld,%d) failed: %m", (long) (-pid), signal);
			break;
		default:
			break;
	}
#endif
}

M
 
Marc G. Fournier 已提交
2971
/*
2972 2973 2974
 * Send a signal to all backend children, including autovacuum workers
 * (but NOT special children; dead_end children are never signaled, either).
 * If only_autovac is TRUE, only the autovacuum worker processes are signalled.
2975 2976 2977
 */
static void
SignalSomeChildren(int signal, bool only_autovac)
M
 
Marc G. Fournier 已提交
2978
{
2979
	Dlelem	   *curr;
M
 
Marc G. Fournier 已提交
2980

2981
	for (curr = DLGetHead(BackendList); curr; curr = DLGetSucc(curr))
M
 
Marc G. Fournier 已提交
2982
	{
2983
		Backend    *bp = (Backend *) DLE_VAL(curr);
M
 
Marc G. Fournier 已提交
2984

2985 2986
		if (bp->dead_end)
			continue;
2987 2988 2989
		if (only_autovac && !bp->is_autovacuum)
			continue;

2990 2991 2992
		ereport(DEBUG4,
				(errmsg_internal("sending signal %d to process %d",
								 signal, (int) bp->pid)));
2993
		signal_child(bp->pid, signal);
M
 
Marc G. Fournier 已提交
2994 2995 2996
	}
}

2997 2998 2999
/*
 * BackendStartup -- start backend process
 *
3000
 * returns: STATUS_ERROR if the fork failed, STATUS_OK otherwise.
3001 3002
 *
 * Note: if you change this code, also consider StartAutovacuumWorker.
3003
 */
3004
static int
3005
BackendStartup(Port *port)
3006
{
3007
	Backend    *bn;				/* for backend cleanup */
3008
	pid_t		pid;
B
Bruce Momjian 已提交
3009

3010
	/*
3011 3012
	 * Create backend data structure.  Better before the fork() so we can
	 * handle failure cleanly.
3013 3014 3015 3016
	 */
	bn = (Backend *) malloc(sizeof(Backend));
	if (!bn)
	{
3017 3018 3019
		ereport(LOG,
				(errcode(ERRCODE_OUT_OF_MEMORY),
				 errmsg("out of memory")));
3020 3021 3022
		return STATUS_ERROR;
	}

3023 3024 3025 3026 3027 3028 3029 3030
	/*
	 * Compute the cancel key that will be assigned to this backend. The
	 * backend will have its own copy in the forked-off process' value of
	 * MyCancelKey, so that it can transmit the key to the frontend.
	 */
	MyCancelKey = PostmasterRandom();
	bn->cancel_key = MyCancelKey;

3031
	/* Pass down canAcceptConnections state */
3032
	port->canAcceptConnections = canAcceptConnections();
3033 3034 3035 3036 3037 3038 3039 3040 3041 3042
	bn->dead_end = (port->canAcceptConnections != CAC_OK &&
					port->canAcceptConnections != CAC_WAITBACKUP);

	/*
	 * Unless it's a dead_end child, assign it a child slot number
	 */
	if (!bn->dead_end)
		bn->child_slot = MyPMChildSlot = AssignPostmasterChildSlot();
	else
		bn->child_slot = 0;
3043 3044 3045

#ifdef EXEC_BACKEND
	pid = backend_forkexec(port);
B
Bruce Momjian 已提交
3046
#else							/* !EXEC_BACKEND */
3047
	pid = fork_process();
3048
	if (pid == 0)				/* child */
3049 3050
	{
		free(bn);
3051 3052 3053 3054 3055 3056

		/*
		 * Let's clean up ourselves as the postmaster child, and close the
		 * postmaster's listen sockets.  (In EXEC_BACKEND case this is all
		 * done in SubPostmasterMain.)
		 */
B
Bruce Momjian 已提交
3057
		IsUnderPostmaster = true;		/* we are a postmaster subprocess now */
3058

B
Bruce Momjian 已提交
3059
		MyProcPid = getpid();	/* reset MyProcPid */
3060

3061 3062
		MyStartTime = time(NULL);

3063 3064 3065 3066 3067 3068
		/* We don't want the postmaster's proc_exit() handlers */
		on_exit_reset();

		/* Close the postmaster's sockets */
		ClosePostmasterPorts(false);

3069
		/* Perform additional initialization and collect startup packet */
3070 3071 3072
		BackendInitialize(port);

		/* And run the backend */
3073
		proc_exit(BackendRun(port));
3074
	}
B
Bruce Momjian 已提交
3075
#endif   /* EXEC_BACKEND */
3076

3077 3078
	if (pid < 0)
	{
3079
		/* in parent, fork failed */
3080 3081
		int			save_errno = errno;

3082 3083
		if (!bn->dead_end)
			(void) ReleasePostmasterChildSlot(bn->child_slot);
3084
		free(bn);
3085 3086
		errno = save_errno;
		ereport(LOG,
B
Bruce Momjian 已提交
3087
				(errmsg("could not fork new process for connection: %m")));
3088
		report_fork_failure_to_client(port, save_errno);
3089
		return STATUS_ERROR;
3090 3091
	}

3092
	/* in parent, successful fork */
3093 3094 3095
	ereport(DEBUG2,
			(errmsg_internal("forked new backend, pid=%d socket=%d",
							 (int) pid, port->sock)));
3096 3097

	/*
B
Bruce Momjian 已提交
3098 3099
	 * Everything's been successful, it's safe to add this backend to our list
	 * of backends.
3100 3101
	 */
	bn->pid = pid;
3102
	bn->is_autovacuum = false;
3103 3104
	DLInitElem(&bn->elem, bn);
	DLAddHead(BackendList, &bn->elem);
3105
#ifdef EXEC_BACKEND
3106 3107
	if (!bn->dead_end)
		ShmemBackendArrayAdd(bn);
3108
#endif
3109

3110
	return STATUS_OK;
3111 3112
}

3113 3114
/*
 * Try to report backend fork() failure to client before we close the
B
Bruce Momjian 已提交
3115
 * connection.	Since we do not care to risk blocking the postmaster on
3116 3117 3118 3119 3120 3121 3122 3123 3124
 * this connection, we set the connection to non-blocking and try only once.
 *
 * This is grungy special-purpose code; we cannot use backend libpq since
 * it's not up and running.
 */
static void
report_fork_failure_to_client(Port *port, int errnum)
{
	char		buffer[1000];
3125
	int			rc;
B
Bruce Momjian 已提交
3126

3127
	/* Format the error message packet (always V2 protocol) */
3128
	snprintf(buffer, sizeof(buffer), "E%s%s\n",
3129
			 _("could not fork new process for connection: "),
3130 3131 3132
			 strerror(errnum));

	/* Set port to non-blocking.  Don't do send() if this fails */
3133
	if (!pg_set_noblock(port->sock))
3134 3135
		return;

3136 3137 3138 3139 3140
	/* We'll retry after EINTR, but ignore all other failures */
	do
	{
		rc = send(port->sock, buffer, strlen(buffer) + 1, 0);
	} while (rc < 0 && errno == EINTR);
3141 3142 3143
}


3144
/*
3145
 * BackendInitialize -- initialize an interactive (postmaster-child)
3146
 *				backend process, and collect the client's startup packet.
3147
 *
3148 3149 3150 3151 3152
 * returns: nothing.  Will not return at all if there's any failure.
 *
 * Note: this code does not depend on having any access to shared memory.
 * In the EXEC_BACKEND case, we are physically attached to shared memory
 * but have not yet set up most of our local pointers to shmem structures.
3153
 */
3154 3155
static void
BackendInitialize(Port *port)
3156
{
B
Bruce Momjian 已提交
3157
	int			status;
B
Bruce Momjian 已提交
3158 3159
	char		remote_host[NI_MAXHOST];
	char		remote_port[NI_MAXSERV];
3160
	char		remote_ps_data[NI_MAXHOST];
3161

3162 3163 3164 3165 3166
	/* Save port etc. for ps status */
	MyProcPort = port;

	/*
	 * PreAuthDelay is a debugging aid for investigating problems in the
B
Bruce Momjian 已提交
3167
	 * authentication cycle: it can be set in postgresql.conf to allow time to
3168 3169
	 * attach to the newly-forked backend with a debugger.  (See also
	 * PostAuthDelay, which we allow clients to pass through PGOPTIONS, but
B
Bruce Momjian 已提交
3170
	 * it is not honored until after authentication.)
3171 3172 3173 3174
	 */
	if (PreAuthDelay > 0)
		pg_usleep(PreAuthDelay * 1000000L);

3175
	/* This flag will remain set until InitPostgres finishes authentication */
3176 3177
	ClientAuthInProgress = true;	/* limit visibility of log messages */

3178 3179
	/* save process start time */
	port->SessionStartTime = GetCurrentTimestamp();
3180
	MyStartTime = timestamptz_to_time_t(port->SessionStartTime);
3181 3182 3183 3184 3185

	/* set these to empty in case they are needed before we set them up */
	port->remote_host = "";
	port->remote_port = "";

3186
	/*
B
Bruce Momjian 已提交
3187 3188
	 * Initialize libpq and enable reporting of ereport errors to the client.
	 * Must do this now because authentication uses libpq to send messages.
3189 3190
	 */
	pq_init();					/* initialize libpq to talk to client */
3191
	whereToSendOutput = DestRemote;		/* now safe to ereport to client */
3192

3193
	/*
3194
	 * If possible, make this process a group leader, so that the postmaster
B
Bruce Momjian 已提交
3195
	 * can signal any child processes too.	(We do this now on the off chance
3196 3197 3198 3199 3200 3201 3202 3203
	 * that something might spawn a child process during authentication.)
	 */
#ifdef HAVE_SETSID
	if (setsid() < 0)
		elog(FATAL, "setsid() failed: %m");
#endif

	/*
3204 3205
	 * We arrange for a simple exit(1) if we receive SIGTERM or SIGQUIT
	 * or timeout while trying to collect the startup packet.  Otherwise the
B
Bruce Momjian 已提交
3206
	 * postmaster cannot shutdown the database FAST or IMMED cleanly if a
3207
	 * buggy client fails to send the packet promptly.
3208
	 */
3209 3210 3211 3212
	pqsignal(SIGTERM, startup_die);
	pqsignal(SIGQUIT, startup_die);
	pqsignal(SIGALRM, startup_die);
	PG_SETMASK(&StartupBlockSig);
3213

3214
	/*
3215
	 * Get the remote host name and port for logging and status display.
3216
	 */
B
Bruce Momjian 已提交
3217 3218
	remote_host[0] = '\0';
	remote_port[0] = '\0';
3219 3220 3221
	if (pg_getnameinfo_all(&port->raddr.addr, port->raddr.salen,
						   remote_host, sizeof(remote_host),
						   remote_port, sizeof(remote_port),
3222
					   (log_hostname ? 0 : NI_NUMERICHOST) | NI_NUMERICSERV))
3223
	{
3224 3225 3226 3227
		int			ret = pg_getnameinfo_all(&port->raddr.addr, port->raddr.salen,
											 remote_host, sizeof(remote_host),
											 remote_port, sizeof(remote_port),
											 NI_NUMERICHOST | NI_NUMERICSERV);
B
Bruce Momjian 已提交
3228

3229 3230
		if (ret)
			ereport(WARNING,
3231 3232
					(errmsg_internal("pg_getnameinfo_all() failed: %s",
									 gai_strerror(ret))));
3233
	}
3234 3235 3236
	snprintf(remote_ps_data, sizeof(remote_ps_data),
			 remote_port[0] == '\0' ? "%s" : "%s(%s)",
			 remote_host, remote_port);
B
Bruce Momjian 已提交
3237 3238

	if (Log_connections)
3239
		ereport(LOG,
3240 3241 3242
				(errmsg("connection received: host=%s%s%s",
						remote_host, remote_port[0] ? " port=" : "",
						remote_port)));
3243

3244
	/*
3245
	 * save remote_host and remote_port in port structure
3246 3247 3248 3249
	 */
	port->remote_host = strdup(remote_host);
	port->remote_port = strdup(remote_port);

3250
	/*
3251
	 * Ready to begin client interaction.  We will give up and exit(1) after a
B
Bruce Momjian 已提交
3252
	 * time delay, so that a broken client can't hog a connection
3253 3254
	 * indefinitely.  PreAuthDelay and any DNS interactions above don't count
	 * against the time limit.
3255
	 */
3256
	if (!enable_sig_alarm(AuthenticationTimeout * 1000, false))
3257
		elog(FATAL, "could not set timer for startup packet timeout");
3258 3259

	/*
B
Bruce Momjian 已提交
3260 3261
	 * Receive the startup packet (which might turn out to be a cancel request
	 * packet).
3262 3263 3264
	 */
	status = ProcessStartupPacket(port, false);

3265 3266 3267 3268
	/*
	 * Stop here if it was bad or a cancel packet.  ProcessStartupPacket
	 * already did any appropriate error reporting.
	 */
3269
	if (status != STATUS_OK)
3270
		proc_exit(0);
3271 3272 3273

	/*
	 * Now that we have the user and database name, we can set the process
B
Bruce Momjian 已提交
3274
	 * title for ps.  It's good to do this as early as possible in startup.
3275
	 */
3276
	init_ps_display(port->user_name, port->database_name, remote_ps_data,
B
Bruce Momjian 已提交
3277 3278
					update_process_title ? "authentication" : "");

3279
	/*
3280
	 * Disable the timeout, and prevent SIGTERM/SIGQUIT again.
3281
	 */
3282
	if (!disable_sig_alarm(false))
3283
		elog(FATAL, "could not disable timer for startup packet timeout");
3284
	PG_SETMASK(&BlockSig);
3285 3286 3287 3288 3289 3290 3291 3292 3293 3294 3295 3296 3297 3298 3299 3300
}


/*
 * BackendRun -- set up the backend's argument list and invoke PostgresMain()
 *
 * returns:
 *		Shouldn't return at all.
 *		If PostgresMain() fails, return status.
 */
static int
BackendRun(Port *port)
{
	char	  **av;
	int			maxac;
	int			ac;
3301 3302
	long		secs;
	int			usecs;
3303
	int			i;
3304

M
 
Marc G. Fournier 已提交
3305 3306
	/*
	 * Don't want backend to be able to see the postmaster random number
B
Bruce Momjian 已提交
3307 3308
	 * generator state.  We have to clobber the static random_seed *and* start
	 * a new random sequence in the random() library function.
M
 
Marc G. Fournier 已提交
3309 3310
	 */
	random_seed = 0;
3311
	random_start_time.tv_usec = 0;
3312 3313 3314
	/* slightly hacky way to get integer microseconds part of timestamptz */
	TimestampDifference(0, port->SessionStartTime, &secs, &usecs);
	srandom((unsigned int) (MyProcPid ^ usecs));
3315

3316
	/*
3317 3318
	 * Now, build the argv vector that will be given to PostgresMain.
	 *
3319
	 * The maximum possible number of commandline arguments that could come
3320 3321
	 * from ExtraOptions is (strlen(ExtraOptions) + 1) / 2; see
	 * pg_split_opts().
3322
	 */
3323
	maxac = 5;					/* for fixed args supplied below */
3324 3325 3326 3327 3328
	maxac += (strlen(ExtraOptions) + 1) / 2;

	av = (char **) MemoryContextAlloc(TopMemoryContext,
									  maxac * sizeof(char *));
	ac = 0;
3329

3330
	av[ac++] = "postgres";
3331

3332
	/*
3333
	 * Pass any backend switches specified with -o on the postmaster's own
3334 3335
	 * command line.  We assume these are secure.  (It's OK to mangle
	 * ExtraOptions now, since we're safely inside a subprocess.)
3336
	 */
3337
	pg_split_opts(av, &ac, ExtraOptions);
3338

3339
	/*
3340
	 * Tell the backend which database to use.
3341
	 */
3342
	av[ac++] = port->database_name;
B
Bruce Momjian 已提交
3343

3344
	av[ac] = NULL;
3345

3346 3347
	Assert(ac < maxac);

3348 3349 3350
	/*
	 * Debug: print arguments being passed to backend
	 */
3351 3352
	ereport(DEBUG3,
			(errmsg_internal("%s child[%d]: starting with (",
B
Bruce Momjian 已提交
3353
							 progname, (int) getpid())));
3354
	for (i = 0; i < ac; ++i)
3355 3356 3357 3358
		ereport(DEBUG3,
				(errmsg_internal("\t%s", av[i])));
	ereport(DEBUG3,
			(errmsg_internal(")")));
3359

3360 3361 3362 3363 3364
	/*
	 * Make sure we aren't in PostmasterContext anymore.  (We can't delete it
	 * just yet, though, because InitPostgres will need the HBA data.)
	 */
	MemoryContextSwitchTo(TopMemoryContext);
3365

3366
	return (PostgresMain(ac, av, port->user_name));
3367 3368
}

3369 3370 3371 3372

#ifdef EXEC_BACKEND

/*
3373
 * postmaster_forkexec -- fork and exec a postmaster subprocess
3374
 *
3375 3376 3377 3378 3379 3380 3381 3382
 * The caller must have set up the argv array already, except for argv[2]
 * which will be filled with the name of the temp variable file.
 *
 * Returns the child process PID, or -1 on fork failure (a suitable error
 * message has been logged on failure).
 *
 * All uses of this routine will dispatch to SubPostmasterMain in the
 * child process.
3383
 */
3384 3385
pid_t
postmaster_forkexec(int argc, char *argv[])
3386
{
B
Bruce Momjian 已提交
3387
	Port		port;
3388

3389 3390 3391 3392
	/* This entry point passes dummy values for the Port variables */
	memset(&port, 0, sizeof(port));
	return internal_forkexec(argc, argv, &port);
}
3393

3394 3395 3396
/*
 * backend_forkexec -- fork/exec off a backend process
 *
3397 3398 3399 3400
 * Some operating systems (WIN32) don't have fork() so we have to simulate
 * it by storing parameters that need to be passed to the child and
 * then create a new child process.
 *
3401 3402 3403 3404 3405 3406 3407
 * returns the pid of the fork/exec'd process, or -1 on failure
 */
static pid_t
backend_forkexec(Port *port)
{
	char	   *av[4];
	int			ac = 0;
3408

3409
	av[ac++] = "postgres";
3410
	av[ac++] = "--forkbackend";
3411
	av[ac++] = NULL;			/* filled in by internal_forkexec */
3412

3413 3414
	av[ac] = NULL;
	Assert(ac < lengthof(av));
3415

3416 3417
	return internal_forkexec(ac, av, port);
}
3418

3419 3420 3421 3422 3423 3424 3425 3426
#ifndef WIN32

/*
 * internal_forkexec non-win32 implementation
 *
 * - writes out backend variables to the parameter file
 * - fork():s, and then exec():s the child process
 */
3427 3428 3429
static pid_t
internal_forkexec(int argc, char *argv[], Port *port)
{
3430
	static unsigned long tmpBackendFileNum = 0;
3431 3432
	pid_t		pid;
	char		tmpfilename[MAXPGPATH];
3433
	BackendParameters param;
B
Bruce Momjian 已提交
3434
	FILE	   *fp;
3435 3436 3437 3438 3439

	if (!save_backend_variables(&param, port))
		return -1;				/* log made by save_backend_variables */

	/* Calculate name for temp file */
3440 3441
	snprintf(tmpfilename, MAXPGPATH, "%s/%s.backend_var.%d.%lu",
			 PG_TEMP_FILES_DIR, PG_TEMP_FILE_PREFIX,
3442
			 MyProcPid, ++tmpBackendFileNum);
3443

3444 3445 3446 3447
	/* Open file */
	fp = AllocateFile(tmpfilename, PG_BINARY_W);
	if (!fp)
	{
3448 3449
		/* As in OpenTemporaryFile, try to make the temp-file directory */
		mkdir(PG_TEMP_FILES_DIR, S_IRWXU);
3450 3451 3452 3453 3454 3455 3456 3457 3458 3459 3460 3461 3462 3463 3464 3465 3466 3467 3468 3469 3470 3471 3472 3473 3474 3475 3476 3477 3478

		fp = AllocateFile(tmpfilename, PG_BINARY_W);
		if (!fp)
		{
			ereport(LOG,
					(errcode_for_file_access(),
					 errmsg("could not create file \"%s\": %m",
							tmpfilename)));
			return -1;
		}
	}

	if (fwrite(&param, sizeof(param), 1, fp) != 1)
	{
		ereport(LOG,
				(errcode_for_file_access(),
				 errmsg("could not write to file \"%s\": %m", tmpfilename)));
		FreeFile(fp);
		return -1;
	}

	/* Release file */
	if (FreeFile(fp))
	{
		ereport(LOG,
				(errcode_for_file_access(),
				 errmsg("could not write to file \"%s\": %m", tmpfilename)));
		return -1;
	}
3479

3480 3481 3482
	/* Make sure caller set up argv properly */
	Assert(argc >= 3);
	Assert(argv[argc] == NULL);
3483
	Assert(strncmp(argv[1], "--fork", 6) == 0);
3484
	Assert(argv[2] == NULL);
3485

3486
	/* Insert temp file name after --fork argument */
3487
	argv[2] = tmpfilename;
3488

3489
	/* Fire off execv in child */
3490
	if ((pid = fork_process()) == 0)
3491 3492 3493 3494
	{
		if (execv(postgres_exec_path, argv) < 0)
		{
			ereport(LOG,
P
Peter Eisentraut 已提交
3495
					(errmsg("could not execute server process \"%s\": %m",
3496 3497 3498 3499 3500
							postgres_exec_path)));
			/* We're already in the child process here, can't return */
			exit(1);
		}
	}
3501

B
Bruce Momjian 已提交
3502
	return pid;					/* Parent returns pid, or -1 on fork failure */
3503
}
B
Bruce Momjian 已提交
3504
#else							/* WIN32 */
3505 3506 3507 3508 3509 3510

/*
 * internal_forkexec win32 implementation
 *
 * - starts backend using CreateProcess(), in suspended state
 * - writes out backend variables to the parameter file
B
Bruce Momjian 已提交
3511 3512
 *	- during this, duplicates handles and sockets required for
 *	  inheritance into the new process
3513
 * - resumes execution of the new process once the backend parameter
B
Bruce Momjian 已提交
3514
 *	 file is complete.
3515 3516 3517 3518 3519 3520 3521 3522 3523
 */
static pid_t
internal_forkexec(int argc, char *argv[], Port *port)
{
	STARTUPINFO si;
	PROCESS_INFORMATION pi;
	int			i;
	int			j;
	char		cmdLine[MAXPGPATH * 2];
B
Bruce Momjian 已提交
3524
	HANDLE		paramHandle;
3525 3526
	BackendParameters *param;
	SECURITY_ATTRIBUTES sa;
B
Bruce Momjian 已提交
3527
	char		paramHandleStr[32];
3528
	win32_deadchild_waitinfo *childinfo;
3529 3530 3531 3532

	/* Make sure caller set up argv properly */
	Assert(argc >= 3);
	Assert(argv[argc] == NULL);
3533
	Assert(strncmp(argv[1], "--fork", 6) == 0);
3534 3535 3536
	Assert(argv[2] == NULL);

	/* Set up shared memory for parameter passing */
B
Bruce Momjian 已提交
3537
	ZeroMemory(&sa, sizeof(sa));
3538 3539 3540 3541 3542 3543 3544 3545 3546 3547 3548 3549 3550 3551 3552 3553 3554 3555 3556 3557 3558 3559 3560 3561
	sa.nLength = sizeof(sa);
	sa.bInheritHandle = TRUE;
	paramHandle = CreateFileMapping(INVALID_HANDLE_VALUE,
									&sa,
									PAGE_READWRITE,
									0,
									sizeof(BackendParameters),
									NULL);
	if (paramHandle == INVALID_HANDLE_VALUE)
	{
		elog(LOG, "could not create backend parameter file mapping: error code %d",
			 (int) GetLastError());
		return -1;
	}

	param = MapViewOfFile(paramHandle, FILE_MAP_WRITE, 0, 0, sizeof(BackendParameters));
	if (!param)
	{
		elog(LOG, "could not map backend parameter memory: error code %d",
			 (int) GetLastError());
		CloseHandle(paramHandle);
		return -1;
	}

3562
	/* Insert temp file name after --fork argument */
B
Bruce Momjian 已提交
3563
	sprintf(paramHandleStr, "%lu", (DWORD) paramHandle);
3564 3565 3566 3567 3568 3569 3570 3571 3572 3573 3574 3575 3576 3577 3578 3579 3580 3581 3582 3583 3584
	argv[2] = paramHandleStr;

	/* Format the cmd line */
	cmdLine[sizeof(cmdLine) - 1] = '\0';
	cmdLine[sizeof(cmdLine) - 2] = '\0';
	snprintf(cmdLine, sizeof(cmdLine) - 1, "\"%s\"", postgres_exec_path);
	i = 0;
	while (argv[++i] != NULL)
	{
		j = strlen(cmdLine);
		snprintf(cmdLine + j, sizeof(cmdLine) - 1 - j, " \"%s\"", argv[i]);
	}
	if (cmdLine[sizeof(cmdLine) - 2] != '\0')
	{
		elog(LOG, "subprocess command line too long");
		return -1;
	}

	memset(&pi, 0, sizeof(pi));
	memset(&si, 0, sizeof(si));
	si.cb = sizeof(si);
B
Bruce Momjian 已提交
3585

3586
	/*
B
Bruce Momjian 已提交
3587 3588
	 * Create the subprocess in a suspended state. This will be resumed later,
	 * once we have written out the parameter file.
3589 3590 3591 3592 3593 3594 3595 3596 3597 3598 3599 3600
	 */
	if (!CreateProcess(NULL, cmdLine, NULL, NULL, TRUE, CREATE_SUSPENDED,
					   NULL, NULL, &si, &pi))
	{
		elog(LOG, "CreateProcess call failed: %m (error code %d)",
			 (int) GetLastError());
		return -1;
	}

	if (!save_backend_variables(param, port, pi.hProcess, pi.dwProcessId))
	{
		/*
B
Bruce Momjian 已提交
3601 3602
		 * log made by save_backend_variables, but we have to clean up the
		 * mess with the half-started process
3603 3604
		 */
		if (!TerminateProcess(pi.hProcess, 255))
3605
			ereport(LOG,
3606 3607 3608 3609 3610 3611 3612
					(errmsg_internal("could not terminate unstarted process: error code %d",
									 (int) GetLastError())));
		CloseHandle(pi.hProcess);
		CloseHandle(pi.hThread);
		return -1;				/* log made by save_backend_variables */
	}

3613
	/* Drop the parameter shared memory that is now inherited to the backend */
3614 3615 3616 3617 3618 3619 3620
	if (!UnmapViewOfFile(param))
		elog(LOG, "could not unmap view of backend parameter file: error code %d",
			 (int) GetLastError());
	if (!CloseHandle(paramHandle))
		elog(LOG, "could not close handle to backend parameter file: error code %d",
			 (int) GetLastError());

3621 3622 3623 3624 3625 3626 3627 3628 3629 3630 3631
	/*
	 * Reserve the memory region used by our main shared memory segment before we
	 * resume the child process.
	 */
	if (!pgwin32_ReserveSharedMemoryRegion(pi.hProcess))
	{
		/*
		 * Failed to reserve the memory, so terminate the newly created
		 * process and give up.
		 */
		if (!TerminateProcess(pi.hProcess, 255))
3632
			ereport(LOG,
3633 3634 3635 3636 3637 3638 3639
					(errmsg_internal("could not terminate process that failed to reserve memory: error code %d",
									 (int) GetLastError())));
		CloseHandle(pi.hProcess);
		CloseHandle(pi.hThread);
		return -1;			/* logging done made by pgwin32_ReserveSharedMemoryRegion() */
	}

3640
	/*
B
Bruce Momjian 已提交
3641 3642 3643
	 * Now that the backend variables are written out, we start the child
	 * thread so it can start initializing while we set up the rest of the
	 * parent state.
3644 3645 3646 3647 3648
	 */
	if (ResumeThread(pi.hThread) == -1)
	{
		if (!TerminateProcess(pi.hProcess, 255))
		{
3649
			ereport(LOG,
3650 3651 3652 3653 3654 3655 3656 3657
					(errmsg_internal("could not terminate unstartable process: error code %d",
									 (int) GetLastError())));
			CloseHandle(pi.hProcess);
			CloseHandle(pi.hThread);
			return -1;
		}
		CloseHandle(pi.hProcess);
		CloseHandle(pi.hThread);
3658
		ereport(LOG,
3659 3660 3661 3662 3663
				(errmsg_internal("could not resume thread of unstarted process: error code %d",
								 (int) GetLastError())));
		return -1;
	}

3664
	/*
B
Bruce Momjian 已提交
3665 3666
	 * Queue a waiter for to signal when this child dies. The wait will be
	 * handled automatically by an operating system thread pool.
3667
	 *
B
Bruce Momjian 已提交
3668 3669 3670
	 * Note: use malloc instead of palloc, since it needs to be thread-safe.
	 * Struct will be free():d from the callback function that runs on a
	 * different thread.
3671 3672 3673
	 */
	childinfo = malloc(sizeof(win32_deadchild_waitinfo));
	if (!childinfo)
3674
		ereport(FATAL,
B
Bruce Momjian 已提交
3675 3676
				(errcode(ERRCODE_OUT_OF_MEMORY),
				 errmsg("out of memory")));
3677 3678 3679 3680 3681 3682 3683 3684 3685

	childinfo->procHandle = pi.hProcess;
	childinfo->procId = pi.dwProcessId;

	if (!RegisterWaitForSingleObject(&childinfo->waitHandle,
									 pi.hProcess,
									 pgwin32_deadchild_callback,
									 childinfo,
									 INFINITE,
B
Bruce Momjian 已提交
3686
								WT_EXECUTEONLYONCE | WT_EXECUTEINWAITTHREAD))
3687
		ereport(FATAL,
B
Bruce Momjian 已提交
3688 3689
		(errmsg_internal("could not register process for wait: error code %d",
						 (int) GetLastError())));
3690

3691
	/* Don't close pi.hProcess here - the wait thread needs access to it */
3692 3693 3694 3695 3696

	CloseHandle(pi.hThread);

	return pi.dwProcessId;
}
B
Bruce Momjian 已提交
3697
#endif   /* WIN32 */
3698 3699


3700
/*
3701 3702 3703
 * SubPostmasterMain -- Get the fork/exec'd process into a state equivalent
 *			to what it would be if we'd simply forked on Unix, and then
 *			dispatch to the appropriate place.
3704
 *
3705
 * The first two command line arguments are expected to be "--forkFOO"
3706 3707 3708 3709
 * (where FOO indicates which postmaster child we are to become), and
 * the name of a variables file that we can read to load data that would
 * have been inherited by fork() on Unix.  Remaining arguments go to the
 * subprocess FooMain() routine.
3710
 */
3711 3712
int
SubPostmasterMain(int argc, char *argv[])
3713
{
3714
	Port		port;
3715

3716 3717
	/* Do this sooner rather than later... */
	IsUnderPostmaster = true;	/* we are a postmaster subprocess now */
3718

3719
	MyProcPid = getpid();		/* reset MyProcPid */
3720

3721 3722
	MyStartTime = time(NULL);

B
Bruce Momjian 已提交
3723 3724 3725 3726 3727
	/*
	 * make sure stderr is in binary mode before anything can possibly be
	 * written to it, in case it's actually the syslogger pipe, so the pipe
	 * chunking protocol isn't disturbed. Non-logpipe data gets translated on
	 * redirection (e.g. via pg_ctl -l) anyway.
3728 3729
	 */
#ifdef WIN32
B
Bruce Momjian 已提交
3730
	_setmode(fileno(stderr), _O_BINARY);
3731 3732
#endif

3733 3734 3735
	/* Lose the postmaster's on-exit routines (really a no-op) */
	on_exit_reset();

3736 3737
	/* In EXEC_BACKEND case we will not have inherited these settings */
	IsPostmasterEnvironment = true;
3738
	whereToSendOutput = DestNone;
3739 3740 3741 3742 3743 3744

	/* Setup essential subsystems (to ensure elog() behaves sanely) */
	MemoryContextInit();
	InitializeGUCOptions();

	/* Read in the variables file */
3745 3746 3747
	memset(&port, 0, sizeof(Port));
	read_backend_variables(argv[2], &port);

B
Bruce Momjian 已提交
3748 3749 3750
	/*
	 * Set up memory area for GSS information. Mirrors the code in ConnCreate
	 * for the non-exec case.
3751 3752
	 */
#if defined(ENABLE_GSS) || defined(ENABLE_SSPI)
B
Bruce Momjian 已提交
3753
	port.gss = (pg_gssinfo *) calloc(1, sizeof(pg_gssinfo));
3754 3755 3756 3757 3758 3759
	if (!port.gss)
		ereport(FATAL,
				(errcode(ERRCODE_OUT_OF_MEMORY),
				 errmsg("out of memory")));
#endif

3760 3761 3762 3763 3764
	/* Check we got appropriate args */
	if (argc < 3)
		elog(FATAL, "invalid subpostmaster invocation");

	/*
B
Bruce Momjian 已提交
3765 3766 3767
	 * If appropriate, physically re-attach to shared memory segment. We want
	 * to do this before going any further to ensure that we can attach at the
	 * same address the postmaster used.
3768
	 */
3769
	if (strcmp(argv[1], "--forkbackend") == 0 ||
3770 3771
		strcmp(argv[1], "--forkavlauncher") == 0 ||
		strcmp(argv[1], "--forkavworker") == 0 ||
3772
		strcmp(argv[1], "--forkboot") == 0)
3773 3774
		PGSharedMemoryReAttach();

3775
	/* autovacuum needs this set before calling InitProcess */
3776 3777 3778 3779
	if (strcmp(argv[1], "--forkavlauncher") == 0)
		AutovacuumLauncherIAm();
	if (strcmp(argv[1], "--forkavworker") == 0)
		AutovacuumWorkerIAm();
3780

3781
	/*
B
Bruce Momjian 已提交
3782 3783 3784
	 * Start our win32 signal implementation. This has to be done after we
	 * read the backend variables, because we need to pick up the signal pipe
	 * from the parent process.
3785 3786 3787 3788 3789
	 */
#ifdef WIN32
	pgwin32_signal_initialize();
#endif

3790 3791 3792
	/* In EXEC_BACKEND case we will not have inherited these settings */
	pqinitmask();
	PG_SETMASK(&BlockSig);
3793

3794
	/* Read in remaining GUC variables */
3795
	read_nondefault_variables();
3796

3797
	/*
3798 3799 3800 3801
	 * Reload any libraries that were preloaded by the postmaster.	Since we
	 * exec'd this process, those libraries didn't come along with us; but we
	 * should load them into all child processes to be consistent with the
	 * non-EXEC_BACKEND behavior.
3802 3803 3804
	 */
	process_shared_preload_libraries();

3805
	/* Run backend or appropriate child */
3806
	if (strcmp(argv[1], "--forkbackend") == 0)
3807
	{
3808
		Assert(argc == 3);		/* shouldn't be any more args */
B
Bruce Momjian 已提交
3809

3810 3811
		/* Close the postmaster's sockets */
		ClosePostmasterPorts(false);
B
Bruce Momjian 已提交
3812

3813
		/*
B
Bruce Momjian 已提交
3814 3815 3816
		 * Need to reinitialize the SSL library in the backend, since the
		 * context structures contain function pointers and cannot be passed
		 * through the parameter file.
3817 3818 3819
		 *
		 * XXX should we do this in all child processes?  For the moment it's
		 * enough to do it in backend children.
3820
		 */
3821
#ifdef USE_SSL
3822 3823 3824 3825
		if (EnableSSL)
			secure_initialize();
#endif

3826
		/*
3827
		 * Perform additional initialization and collect startup packet.
3828
		 *
B
Bruce Momjian 已提交
3829 3830 3831 3832 3833
		 * We want to do this before InitProcess() for a couple of reasons: 1.
		 * so that we aren't eating up a PGPROC slot while waiting on the
		 * client. 2. so that if InitProcess() fails due to being out of
		 * PGPROC slots, we have already initialized libpq and are able to
		 * report the error to the client.
3834 3835 3836 3837 3838 3839 3840 3841 3842
		 */
		BackendInitialize(&port);

		/* Restore basic shared memory pointers */
		InitShmemAccess(UsedShmemSegAddr);

		/* Need a PGPROC to run CreateSharedMemoryAndSemaphores */
		InitProcess();

3843
		/*
B
Bruce Momjian 已提交
3844 3845
		 * Attach process to shared data structures.  If testing EXEC_BACKEND
		 * on Linux, you must run this as root before starting the postmaster:
3846
		 *
B
Bruce Momjian 已提交
3847
		 * echo 0 >/proc/sys/kernel/randomize_va_space
3848
		 *
B
Bruce Momjian 已提交
3849 3850 3851 3852
		 * This prevents a randomized stack base address that causes child
		 * shared memory to be at a different address than the parent, making
		 * it impossible to attached to shared memory.	Return the value to
		 * '1' when finished.
3853
		 */
3854 3855 3856
		CreateSharedMemoryAndSemaphores(false, 0);

		/* And run the backend */
3857 3858
		proc_exit(BackendRun(&port));
	}
3859
	if (strcmp(argv[1], "--forkboot") == 0)
3860 3861
	{
		/* Close the postmaster's sockets */
3862
		ClosePostmasterPorts(false);
3863

3864 3865 3866 3867
		/* Restore basic shared memory pointers */
		InitShmemAccess(UsedShmemSegAddr);

		/* Need a PGPROC to run CreateSharedMemoryAndSemaphores */
3868
		InitAuxiliaryProcess();
3869

3870
		/* Attach process to shared data structures */
3871
		CreateSharedMemoryAndSemaphores(false, 0);
3872

3873
		AuxiliaryProcessMain(argc - 2, argv + 2);
3874
		proc_exit(0);
3875
	}
3876 3877 3878 3879 3880 3881 3882 3883 3884
	if (strcmp(argv[1], "--forkavlauncher") == 0)
	{
		/* Close the postmaster's sockets */
		ClosePostmasterPorts(false);

		/* Restore basic shared memory pointers */
		InitShmemAccess(UsedShmemSegAddr);

		/* Need a PGPROC to run CreateSharedMemoryAndSemaphores */
3885
		InitProcess();
3886 3887 3888 3889 3890 3891 3892 3893

		/* Attach process to shared data structures */
		CreateSharedMemoryAndSemaphores(false, 0);

		AutoVacLauncherMain(argc - 2, argv + 2);
		proc_exit(0);
	}
	if (strcmp(argv[1], "--forkavworker") == 0)
3894 3895 3896 3897
	{
		/* Close the postmaster's sockets */
		ClosePostmasterPorts(false);

3898 3899 3900 3901 3902 3903
		/* Restore basic shared memory pointers */
		InitShmemAccess(UsedShmemSegAddr);

		/* Need a PGPROC to run CreateSharedMemoryAndSemaphores */
		InitProcess();

3904
		/* Attach process to shared data structures */
3905 3906
		CreateSharedMemoryAndSemaphores(false, 0);

3907
		AutoVacWorkerMain(argc - 2, argv + 2);
3908 3909
		proc_exit(0);
	}
3910
	if (strcmp(argv[1], "--forkarch") == 0)
3911 3912
	{
		/* Close the postmaster's sockets */
3913
		ClosePostmasterPorts(false);
3914 3915 3916 3917 3918 3919

		/* Do not want to attach to shared memory */

		PgArchiverMain(argc, argv);
		proc_exit(0);
	}
3920
	if (strcmp(argv[1], "--forkcol") == 0)
3921 3922
	{
		/* Close the postmaster's sockets */
3923
		ClosePostmasterPorts(false);
3924 3925 3926 3927

		/* Do not want to attach to shared memory */

		PgstatCollectorMain(argc, argv);
3928
		proc_exit(0);
3929
	}
3930
	if (strcmp(argv[1], "--forklog") == 0)
3931 3932 3933 3934 3935 3936 3937 3938 3939
	{
		/* Close the postmaster's sockets */
		ClosePostmasterPorts(true);

		/* Do not want to attach to shared memory */

		SysLoggerMain(argc, argv);
		proc_exit(0);
	}
3940 3941

	return 1;					/* shouldn't get here */
3942
}
B
Bruce Momjian 已提交
3943
#endif   /* EXEC_BACKEND */
3944 3945


3946 3947
/*
 * ExitPostmaster -- cleanup
3948 3949
 *
 * Do NOT call exit() directly --- always go through here!
3950
 */
3951
static void
3952 3953
ExitPostmaster(int status)
{
3954 3955 3956
	/* should cleanup shared memory and kill all backends */

	/*
B
Bruce Momjian 已提交
3957 3958
	 * Not sure of the semantics here.	When the Postmaster dies, should the
	 * backends all be killed? probably not.
3959 3960
	 *
	 * MUST		-- vadim 05-10-1999
3961
	 */
3962

3963
	proc_exit(status);
3964 3965
}

3966
/*
3967
 * sigusr1_handler - handle signal conditions from child processes
3968 3969
 */
static void
3970
sigusr1_handler(SIGNAL_ARGS)
3971
{
3972
	int			save_errno = errno;
3973

3974
	PG_SETMASK(&BlockSig);
3975

3976 3977 3978 3979 3980 3981 3982 3983
	/*
	 * RECOVERY_STARTED and RECOVERY_CONSISTENT signals are ignored in
	 * unexpected states. If the startup process quickly starts up, completes
	 * recovery, exits, we might process the death of the startup process
	 * first. We don't want to go back to recovery in that case.
	 */
	if (CheckPostmasterSignal(PMSIGNAL_RECOVERY_STARTED) &&
		pmState == PM_STARTUP)
3984
	{
3985 3986 3987 3988
		/* WAL redo has started. We're out of reinitialization. */
		FatalError = false;

		/*
3989 3990
		 * Crank up the background writer.	It doesn't matter if this fails,
		 * we'll just try again later.
3991 3992 3993 3994 3995
		 */
		Assert(BgWriterPID == 0);
		BgWriterPID = StartBackgroundWriter();

		pmState = PM_RECOVERY;
3996
	}
3997 3998
	if (CheckPostmasterSignal(PMSIGNAL_RECOVERY_CONSISTENT) &&
		pmState == PM_RECOVERY)
3999
	{
4000 4001 4002 4003 4004
		/*
		 * Likewise, start other special children as needed.
		 */
		Assert(PgStatPID == 0);
		PgStatPID = pgstat_start();
T
Tom Lane 已提交
4005

4006 4007 4008
		/* XXX at this point we could accept read-only connections */
		ereport(DEBUG1,
				(errmsg("database system is in consistent recovery mode")));
T
Tom Lane 已提交
4009

4010 4011
		pmState = PM_RECOVERY_CONSISTENT;
	}
4012

4013
	if (CheckPostmasterSignal(PMSIGNAL_WAKEN_ARCHIVER) &&
4014
		PgArchPID != 0)
4015
	{
4016
		/*
B
Bruce Momjian 已提交
4017 4018
		 * Send SIGUSR1 to archiver process, to wake it up and begin archiving
		 * next transaction log file.
4019
		 */
4020
		signal_child(PgArchPID, SIGUSR1);
B
Bruce Momjian 已提交
4021
	}
4022

4023 4024 4025 4026
	if (CheckPostmasterSignal(PMSIGNAL_ROTATE_LOGFILE) &&
		SysLoggerPID != 0)
	{
		/* Tell syslogger to rotate logfile */
4027
		signal_child(SysLoggerPID, SIGUSR1);
4028
	}
4029

4030
	if (CheckPostmasterSignal(PMSIGNAL_START_AUTOVAC_LAUNCHER))
4031
	{
4032 4033 4034 4035 4036 4037 4038 4039 4040
		/*
		 * Start one iteration of the autovacuum daemon, even if autovacuuming
		 * is nominally not enabled.  This is so we can have an active defense
		 * against transaction ID wraparound.  We set a flag for the main loop
		 * to do it rather than trying to do it here --- this is because the
		 * autovac process itself may send the signal, and we want to handle
		 * that by launching another iteration as soon as the current one
		 * completes.
		 */
4041
		start_autovac_launcher = true;
4042 4043
	}

4044
	if (CheckPostmasterSignal(PMSIGNAL_START_AUTOVAC_WORKER))
4045 4046
	{
		/* The autovacuum launcher wants us to start a worker process. */
4047
		StartAutovacuumWorker();
4048
	}
4049

4050 4051
	PG_SETMASK(&UnBlockSig);

T
Tom Lane 已提交
4052 4053 4054
	errno = save_errno;
}

4055 4056 4057 4058 4059 4060 4061 4062 4063 4064 4065 4066 4067 4068
/*
 * Timeout or shutdown signal from postmaster while processing startup packet.
 * Cleanup and exit(1).
 *
 * XXX: possible future improvement: try to send a message indicating
 * why we are disconnecting.  Problem is to be sure we don't block while
 * doing so, nor mess up SSL initialization.  In practice, if the client
 * has wedged here, it probably couldn't do anything with the message anyway.
 */
static void
startup_die(SIGNAL_ARGS)
{
	proc_exit(1);
}
4069

4070 4071 4072 4073
/*
 * Dummy signal handler
 *
 * We use this for signals that we don't actually use in the postmaster,
4074 4075 4076 4077
 * but we do use in backends.  If we were to SIG_IGN such signals in the
 * postmaster, then a newly started backend might drop a signal that arrives
 * before it's able to reconfigure its signal processing.  (See notes in
 * tcop/postgres.c.)
4078 4079 4080 4081 4082 4083
 */
static void
dummy_handler(SIGNAL_ARGS)
{
}

4084 4085 4086 4087
/*
 * RandomSalt
 */
static void
4088
RandomSalt(char *md5Salt)
4089
{
4090
	long		rand;
4091

4092
	/*
B
Bruce Momjian 已提交
4093 4094 4095
	 * We use % 255, sacrificing one possible byte value, so as to ensure that
	 * all bits of the random() value participate in the result. While at it,
	 * add one to avoid generating any null bytes.
4096
	 */
4097
	rand = PostmasterRandom();
4098 4099 4100 4101 4102
	md5Salt[0] = (rand % 255) + 1;
	rand = PostmasterRandom();
	md5Salt[1] = (rand % 255) + 1;
	rand = PostmasterRandom();
	md5Salt[2] = (rand % 255) + 1;
B
Bruce Momjian 已提交
4103
	rand = PostmasterRandom();
4104
	md5Salt[3] = (rand % 255) + 1;
4105 4106 4107 4108 4109 4110 4111
}

/*
 * PostmasterRandom
 */
static long
PostmasterRandom(void)
4112
{
4113 4114 4115 4116
	/*
	 * Select a random seed at the time of first receiving a request.
	 */
	if (random_seed == 0)
4117
	{
4118 4119 4120 4121 4122
		do
		{
			struct timeval random_stop_time;

			gettimeofday(&random_stop_time, NULL);
B
Bruce Momjian 已提交
4123

4124 4125 4126 4127 4128 4129 4130 4131 4132 4133 4134 4135
			/*
			 * We are not sure how much precision is in tv_usec, so we swap
			 * the high and low 16 bits of 'random_stop_time' and XOR them
			 * with 'random_start_time'. On the off chance that the result is
			 * 0, we loop until it isn't.
			 */
			random_seed = random_start_time.tv_usec ^
				((random_stop_time.tv_usec << 16) |
				 ((random_stop_time.tv_usec >> 16) & 0xffff));
		}
		while (random_seed == 0);

4136
		srandom(random_seed);
4137
	}
4138

4139
	return random();
4140
}
4141 4142

/*
4143 4144
 * Count up number of child processes (excluding special children and
 * dead_end children)
4145 4146 4147 4148 4149 4150 4151 4152
 */
static int
CountChildren(void)
{
	Dlelem	   *curr;
	int			cnt = 0;

	for (curr = DLGetHead(BackendList); curr; curr = DLGetSucc(curr))
4153 4154 4155 4156 4157 4158
	{
		Backend    *bp = (Backend *) DLE_VAL(curr);

		if (!bp->dead_end)
			cnt++;
	}
4159 4160
	return cnt;
}
4161

4162

T
Tom Lane 已提交
4163
/*
4164
 * StartChildProcess -- start an auxiliary process for the postmaster
4165
 *
B
Bruce Momjian 已提交
4166
 * xlop determines what kind of child will be started.	All child types
4167
 * initially go to AuxiliaryProcessMain, which will handle common setup.
4168
 *
4169 4170
 * Return value of StartChildProcess is subprocess' PID, or 0 if failed
 * to start subprocess.
T
Tom Lane 已提交
4171
 */
4172
static pid_t
4173
StartChildProcess(AuxProcType type)
4174
{
4175 4176 4177
	pid_t		pid;
	char	   *av[10];
	int			ac = 0;
4178
	char		typebuf[32];
B
Bruce Momjian 已提交
4179

4180 4181 4182 4183
	/*
	 * Set up command-line arguments for subprocess
	 */
	av[ac++] = "postgres";
4184

4185
#ifdef EXEC_BACKEND
4186
	av[ac++] = "--forkboot";
4187
	av[ac++] = NULL;			/* filled in by postmaster_forkexec */
4188 4189
#endif

4190 4191
	snprintf(typebuf, sizeof(typebuf), "-x%d", type);
	av[ac++] = typebuf;
4192

4193 4194
	av[ac] = NULL;
	Assert(ac < lengthof(av));
4195

4196 4197
#ifdef EXEC_BACKEND
	pid = postmaster_forkexec(ac, av);
B
Bruce Momjian 已提交
4198
#else							/* !EXEC_BACKEND */
4199
	pid = fork_process();
B
Bruce Momjian 已提交
4200

4201 4202
	if (pid == 0)				/* child */
	{
B
Bruce Momjian 已提交
4203
		IsUnderPostmaster = true;		/* we are a postmaster subprocess now */
4204

4205
		/* Close the postmaster's sockets */
4206
		ClosePostmasterPorts(false);
4207

4208 4209
		/* Lose the postmaster's on-exit routines and port connections */
		on_exit_reset();
4210

4211 4212 4213 4214 4215
		/* Release postmaster's working memory context */
		MemoryContextSwitchTo(TopMemoryContext);
		MemoryContextDelete(PostmasterContext);
		PostmasterContext = NULL;

4216
		AuxiliaryProcessMain(ac, av);
4217
		ExitPostmaster(0);
4218
	}
B
Bruce Momjian 已提交
4219
#endif   /* EXEC_BACKEND */
4220

4221 4222
	if (pid < 0)
	{
4223 4224
		/* in parent, fork failed */
		int			save_errno = errno;
B
Bruce Momjian 已提交
4225

4226
		errno = save_errno;
4227
		switch (type)
4228
		{
4229
			case StartupProcess:
4230 4231
				ereport(LOG,
						(errmsg("could not fork startup process: %m")));
4232
				break;
4233
			case BgWriterProcess:
J
Jan Wieck 已提交
4234
				ereport(LOG,
B
Bruce Momjian 已提交
4235
				   (errmsg("could not fork background writer process: %m")));
4236
				break;
4237 4238
			case WalWriterProcess:
				ereport(LOG,
B
Bruce Momjian 已提交
4239
						(errmsg("could not fork WAL writer process: %m")));
4240
				break;
4241
			default:
4242 4243
				ereport(LOG,
						(errmsg("could not fork process: %m")));
4244 4245
				break;
		}
B
Bruce Momjian 已提交
4246

4247
		/*
B
Bruce Momjian 已提交
4248 4249
		 * fork failure is fatal during startup, but there's no need to choke
		 * immediately if starting other child types fails.
4250
		 */
4251
		if (type == StartupProcess)
4252 4253
			ExitPostmaster(1);
		return 0;
4254 4255
	}

T
Tom Lane 已提交
4256
	/*
4257
	 * in parent, successful fork
T
Tom Lane 已提交
4258
	 */
4259
	return pid;
4260
}
4261

4262 4263 4264 4265 4266 4267 4268 4269 4270 4271 4272 4273
/*
 * StartAutovacuumWorker
 *		Start an autovac worker process.
 *
 * This function is here because it enters the resulting PID into the
 * postmaster's private backends list.
 *
 * NB -- this code very roughly matches BackendStartup.
 */
static void
StartAutovacuumWorker(void)
{
B
Bruce Momjian 已提交
4274
	Backend    *bn;
4275 4276

	/*
4277 4278 4279 4280 4281
	 * If not in condition to run a process, don't try, but handle it like a
	 * fork failure.  This does not normally happen, since the signal is only
	 * supposed to be sent by autovacuum launcher when it's OK to do it, but
	 * we have to check to avoid race-condition problems during DB state
	 * changes.
4282
	 */
4283
	if (canAcceptConnections() == CAC_OK)
4284
	{
4285 4286
		bn = (Backend *) malloc(sizeof(Backend));
		if (bn)
4287
		{
4288
			/*
4289 4290 4291 4292
			 * Compute the cancel key that will be assigned to this session.
			 * We probably don't need cancel keys for autovac workers, but
			 * we'd better have something random in the field to prevent
			 * unfriendly people from sending cancels to them.
4293 4294 4295 4296 4297 4298 4299 4300
			 */
			MyCancelKey = PostmasterRandom();
			bn->cancel_key = MyCancelKey;

			/* Autovac workers are not dead_end and need a child slot */
			bn->dead_end = false;
			bn->child_slot = MyPMChildSlot = AssignPostmasterChildSlot();

4301 4302 4303 4304
			bn->pid = StartAutoVacWorker();
			if (bn->pid > 0)
			{
				bn->is_autovacuum = true;
T
Tom Lane 已提交
4305 4306
				DLInitElem(&bn->elem, bn);
				DLAddHead(BackendList, &bn->elem);
4307
#ifdef EXEC_BACKEND
4308
				ShmemBackendArrayAdd(bn);
4309
#endif
4310 4311 4312
				/* all OK */
				return;
			}
4313

4314 4315 4316 4317
			/*
			 * fork failed, fall through to report -- actual error message was
			 * logged by StartAutoVacWorker
			 */
4318
			(void) ReleasePostmasterChildSlot(bn->child_slot);
4319 4320 4321 4322 4323 4324
			free(bn);
		}
		else
			ereport(LOG,
					(errcode(ERRCODE_OUT_OF_MEMORY),
					 errmsg("out of memory")));
4325
	}
4326

4327
	/*
B
Bruce Momjian 已提交
4328 4329
	 * Report the failure to the launcher, if it's running.  (If it's not, we
	 * might not even be connected to shared memory, so don't try to call
4330 4331 4332 4333 4334
	 * AutoVacWorkerFailed.)  Note that we also need to signal it so that it
	 * responds to the condition, but we don't do that here, instead waiting
	 * for ServerLoop to do it.  This way we avoid a ping-pong signalling in
	 * quick succession between the autovac launcher and postmaster in case
	 * things get ugly.
4335
	 */
4336
	if (AutoVacPID != 0)
4337 4338
	{
		AutoVacWorkerFailed();
4339
		avlauncher_needs_signal = true;
4340
	}
4341
}
4342

T
Tatsuo Ishii 已提交
4343
/*
4344
 * Create the opts file
T
Tatsuo Ishii 已提交
4345
 */
4346
static bool
4347
CreateOptsFile(int argc, char *argv[], char *fullprogname)
4348
{
B
Bruce Momjian 已提交
4349
	FILE	   *fp;
4350
	int			i;
4351

4352
#define OPTS_FILE	"postmaster.opts"
4353

4354
	if ((fp = fopen(OPTS_FILE, "w")) == NULL)
4355
	{
4356
		elog(LOG, "could not create file \"%s\": %m", OPTS_FILE);
4357
		return false;
4358 4359
	}

4360 4361
	fprintf(fp, "%s", fullprogname);
	for (i = 1; i < argc; i++)
4362
		fprintf(fp, " \"%s\"", argv[i]);
4363
	fputs("\n", fp);
4364

4365
	if (fclose(fp))
4366
	{
4367
		elog(LOG, "could not write file \"%s\": %m", OPTS_FILE);
4368
		return false;
4369 4370
	}

4371
	return true;
4372
}
4373

4374

4375 4376 4377 4378 4379 4380
/*
 * MaxLivePostmasterChildren
 *
 * This reports the number of entries needed in per-child-process arrays
 * (the PMChildFlags array, and if EXEC_BACKEND the ShmemBackendArray).
 * These arrays include regular backends and autovac workers, but not special
4381
 * children nor dead_end children.	This allows the arrays to have a fixed
4382
 * maximum size, to wit the same too-many-children limit enforced by
4383
 * canAcceptConnections().	The exact value isn't too critical as long as
4384 4385 4386 4387 4388 4389 4390 4391 4392
 * it's more than MaxBackends.
 */
int
MaxLivePostmasterChildren(void)
{
	return 2 * MaxBackends;
}


4393 4394 4395
#ifdef EXEC_BACKEND

/*
4396
 * The following need to be available to the save/restore_backend_variables
4397 4398 4399 4400
 * functions
 */
extern slock_t *ShmemLock;
extern LWLock *LWLockArray;
B
Bruce Momjian 已提交
4401
extern slock_t *ProcStructLock;
4402
extern PROC_HDR *ProcGlobal;
4403
extern PGPROC *AuxiliaryProcs;
4404
extern PMSignalData *PMSignalState;
4405
extern int	pgStatSock;
4406 4407

#ifndef WIN32
4408
#define write_inheritable_socket(dest, src, childpid) ((*(dest) = (src)), true)
4409 4410
#define read_inheritable_socket(dest, src) (*(dest) = *(src))
#else
4411 4412
static bool write_duplicated_handle(HANDLE *dest, HANDLE src, HANDLE child);
static bool write_inheritable_socket(InheritableSocket *dest, SOCKET src,
B
Bruce Momjian 已提交
4413
						 pid_t childPid);
4414
static void read_inheritable_socket(SOCKET *dest, InheritableSocket *src);
4415
#endif
4416 4417


4418 4419
/* Save critical backend variables into the BackendParameters struct */
#ifndef WIN32
4420
static bool
4421
save_backend_variables(BackendParameters *param, Port *port)
4422 4423
#else
static bool
4424
save_backend_variables(BackendParameters *param, Port *port,
4425 4426
					   HANDLE childProcess, pid_t childPid)
#endif
4427
{
4428
	memcpy(&param->port, port, sizeof(Port));
4429 4430
	if (!write_inheritable_socket(&param->portsocket, port->sock, childPid))
		return false;
B
Bruce Momjian 已提交
4431

4432
	strlcpy(param->DataDir, DataDir, MAXPGPATH);
4433

4434
	memcpy(&param->ListenSocket, &ListenSocket, sizeof(ListenSocket));
B
Bruce Momjian 已提交
4435

4436
	param->MyCancelKey = MyCancelKey;
4437
	param->MyPMChildSlot = MyPMChildSlot;
4438

4439 4440
	param->UsedShmemSegID = UsedShmemSegID;
	param->UsedShmemSegAddr = UsedShmemSegAddr;
4441

4442 4443 4444
	param->ShmemLock = ShmemLock;
	param->ShmemVariableCache = ShmemVariableCache;
	param->ShmemBackendArray = ShmemBackendArray;
4445

4446 4447
	param->LWLockArray = LWLockArray;
	param->ProcStructLock = ProcStructLock;
4448
	param->ProcGlobal = ProcGlobal;
4449
	param->AuxiliaryProcs = AuxiliaryProcs;
4450
	param->PMSignalState = PMSignalState;
4451 4452
	if (!write_inheritable_socket(&param->pgStatSock, pgStatSock, childPid))
		return false;
4453

4454
	param->PostmasterPid = PostmasterPid;
4455
	param->PgStartTime = PgStartTime;
4456
	param->PgReloadTime = PgReloadTime;
4457

4458 4459
	param->redirection_done = redirection_done;

4460 4461
#ifdef WIN32
	param->PostmasterHandle = PostmasterHandle;
4462
	if (!write_duplicated_handle(&param->initial_signal_pipe,
4463
							pgwin32_create_signal_listener(childPid),
4464 4465
							childProcess))
		return false;
4466
#endif
4467

4468
	memcpy(&param->syslogPipe, &syslogPipe, sizeof(syslogPipe));
4469

4470
	strlcpy(param->my_exec_path, my_exec_path, MAXPGPATH);
4471

4472
	strlcpy(param->pkglib_path, pkglib_path, MAXPGPATH);
4473

4474
	strlcpy(param->ExtraOptions, ExtraOptions, MAXPGPATH);
4475

4476 4477
	return true;
}
4478

4479

4480 4481 4482 4483 4484
#ifdef WIN32
/*
 * Duplicate a handle for usage in a child process, and write the child
 * process instance of the handle to the parameter file.
 */
4485
static bool
4486
write_duplicated_handle(HANDLE *dest, HANDLE src, HANDLE childProcess)
4487
{
B
Bruce Momjian 已提交
4488
	HANDLE		hChild = INVALID_HANDLE_VALUE;
4489 4490 4491 4492 4493 4494 4495 4496

	if (!DuplicateHandle(GetCurrentProcess(),
						 src,
						 childProcess,
						 &hChild,
						 0,
						 TRUE,
						 DUPLICATE_CLOSE_SOURCE | DUPLICATE_SAME_ACCESS))
4497 4498
	{
		ereport(LOG,
4499 4500
				(errmsg_internal("could not duplicate handle to be written to backend parameter file: error code %d",
								 (int) GetLastError())));
4501 4502
		return false;
	}
4503 4504

	*dest = hChild;
4505
	return true;
4506
}
4507

4508 4509 4510 4511 4512 4513 4514
/*
 * Duplicate a socket for usage in a child process, and write the resulting
 * structure to the parameter file.
 * This is required because a number of LSPs (Layered Service Providers) very
 * common on Windows (antivirus, firewalls, download managers etc) break
 * straight socket inheritance.
 */
4515
static bool
4516
write_inheritable_socket(InheritableSocket *dest, SOCKET src, pid_t childpid)
4517 4518 4519 4520 4521 4522
{
	dest->origsocket = src;
	if (src != 0 && src != -1)
	{
		/* Actual socket */
		if (WSADuplicateSocket(src, childpid, &dest->wsainfo) != 0)
4523 4524
		{
			ereport(LOG,
4525 4526
					(errmsg("could not duplicate socket %d for use in backend: error code %d",
							src, WSAGetLastError())));
4527 4528
			return false;
		}
4529
	}
4530
	return true;
4531
}
4532

4533 4534 4535 4536
/*
 * Read a duplicate socket structure back, and get the socket descriptor.
 */
static void
4537
read_inheritable_socket(SOCKET *dest, InheritableSocket *src)
4538
{
B
Bruce Momjian 已提交
4539
	SOCKET		s;
4540

B
Bruce Momjian 已提交
4541
	if (src->origsocket == -1 || src->origsocket == 0)
4542
	{
4543 4544
		/* Not a real socket! */
		*dest = src->origsocket;
4545
	}
4546 4547 4548 4549 4550 4551 4552 4553 4554 4555 4556 4557 4558 4559 4560 4561
	else
	{
		/* Actual socket, so create from structure */
		s = WSASocket(FROM_PROTOCOL_INFO,
					  FROM_PROTOCOL_INFO,
					  FROM_PROTOCOL_INFO,
					  &src->wsainfo,
					  0,
					  0);
		if (s == INVALID_SOCKET)
		{
			write_stderr("could not create inherited socket: error code %d\n",
						 WSAGetLastError());
			exit(1);
		}
		*dest = s;
4562

4563
		/*
B
Bruce Momjian 已提交
4564 4565 4566
		 * To make sure we don't get two references to the same socket, close
		 * the original one. (This would happen when inheritance actually
		 * works..
4567 4568 4569
		 */
		closesocket(src->origsocket);
	}
4570
}
4571
#endif
4572

4573
static void
4574
read_backend_variables(char *id, Port *port)
4575
{
4576 4577
	BackendParameters param;

4578 4579
#ifndef WIN32
	/* Non-win32 implementation reads from file */
B
Bruce Momjian 已提交
4580
	FILE	   *fp;
4581 4582

	/* Open file */
4583
	fp = AllocateFile(id, PG_BINARY_R);
4584
	if (!fp)
4585 4586 4587 4588 4589 4590 4591 4592 4593 4594 4595 4596 4597 4598 4599 4600 4601 4602 4603 4604 4605 4606 4607
	{
		write_stderr("could not read from backend variables file \"%s\": %s\n",
					 id, strerror(errno));
		exit(1);
	}

	if (fread(&param, sizeof(param), 1, fp) != 1)
	{
		write_stderr("could not read from backend variables file \"%s\": %s\n",
					 id, strerror(errno));
		exit(1);
	}

	/* Release file */
	FreeFile(fp);
	if (unlink(id) != 0)
	{
		write_stderr("could not remove file \"%s\": %s\n",
					 id, strerror(errno));
		exit(1);
	}
#else
	/* Win32 version uses mapped file */
B
Bruce Momjian 已提交
4608
	HANDLE		paramHandle;
4609
	BackendParameters *paramp;
4610

B
Bruce Momjian 已提交
4611
	paramHandle = (HANDLE) atol(id);
4612 4613
	paramp = MapViewOfFile(paramHandle, FILE_MAP_READ, 0, 0, 0);
	if (!paramp)
4614 4615 4616 4617 4618
	{
		write_stderr("could not map view of backend variables: error code %d\n",
					 (int) GetLastError());
		exit(1);
	}
4619

4620
	memcpy(&param, paramp, sizeof(BackendParameters));
4621

4622
	if (!UnmapViewOfFile(paramp))
4623 4624 4625 4626 4627
	{
		write_stderr("could not unmap view of backend variables: error code %d\n",
					 (int) GetLastError());
		exit(1);
	}
4628

4629 4630 4631 4632 4633 4634 4635
	if (!CloseHandle(paramHandle))
	{
		write_stderr("could not close handle to backend parameter variables: error code %d\n",
					 (int) GetLastError());
		exit(1);
	}
#endif
4636 4637

	restore_backend_variables(&param, port);
4638 4639 4640 4641
}

/* Restore critical backend variables from the BackendParameters struct */
static void
4642
restore_backend_variables(BackendParameters *param, Port *port)
4643 4644 4645 4646 4647
{
	memcpy(port, &param->port, sizeof(Port));
	read_inheritable_socket(&port->sock, &param->portsocket);

	SetDataDir(param->DataDir);
4648

4649
	memcpy(&ListenSocket, &param->ListenSocket, sizeof(ListenSocket));
4650

4651
	MyCancelKey = param->MyCancelKey;
4652
	MyPMChildSlot = param->MyPMChildSlot;
4653 4654 4655 4656 4657 4658 4659 4660 4661 4662

	UsedShmemSegID = param->UsedShmemSegID;
	UsedShmemSegAddr = param->UsedShmemSegAddr;

	ShmemLock = param->ShmemLock;
	ShmemVariableCache = param->ShmemVariableCache;
	ShmemBackendArray = param->ShmemBackendArray;

	LWLockArray = param->LWLockArray;
	ProcStructLock = param->ProcStructLock;
4663
	ProcGlobal = param->ProcGlobal;
4664
	AuxiliaryProcs = param->AuxiliaryProcs;
4665
	PMSignalState = param->PMSignalState;
4666 4667 4668
	read_inheritable_socket(&pgStatSock, &param->pgStatSock);

	PostmasterPid = param->PostmasterPid;
4669
	PgStartTime = param->PgStartTime;
4670
	PgReloadTime = param->PgReloadTime;
4671

4672 4673
	redirection_done = param->redirection_done;

4674
#ifdef WIN32
4675 4676
	PostmasterHandle = param->PostmasterHandle;
	pgwin32_initial_signal_pipe = param->initial_signal_pipe;
4677
#endif
4678

4679
	memcpy(&syslogPipe, &param->syslogPipe, sizeof(syslogPipe));
4680

4681
	strlcpy(my_exec_path, param->my_exec_path, MAXPGPATH);
4682

4683
	strlcpy(pkglib_path, param->pkglib_path, MAXPGPATH);
4684

4685
	strlcpy(ExtraOptions, param->ExtraOptions, MAXPGPATH);
4686 4687
}

4688

4689
Size
B
Bruce Momjian 已提交
4690
ShmemBackendArraySize(void)
4691
{
4692
	return mul_size(MaxLivePostmasterChildren(), sizeof(Backend));
4693 4694
}

B
Bruce Momjian 已提交
4695 4696
void
ShmemBackendArrayAllocation(void)
4697
{
4698
	Size		size = ShmemBackendArraySize();
B
Bruce Momjian 已提交
4699 4700

	ShmemBackendArray = (Backend *) ShmemAlloc(size);
4701
	/* Mark all slots as empty */
4702 4703 4704
	memset(ShmemBackendArray, 0, size);
}

B
Bruce Momjian 已提交
4705 4706
static void
ShmemBackendArrayAdd(Backend *bn)
4707
{
4708 4709
	/* The array slot corresponding to my PMChildSlot should be free */
	int			i = bn->child_slot - 1;
4710

4711 4712
	Assert(ShmemBackendArray[i].pid == 0);
	ShmemBackendArray[i] = *bn;
4713 4714
}

B
Bruce Momjian 已提交
4715
static void
4716
ShmemBackendArrayRemove(Backend *bn)
4717
{
4718
	int			i = bn->child_slot - 1;
4719

4720 4721 4722
	Assert(ShmemBackendArray[i].pid == bn->pid);
	/* Mark the slot as empty */
	ShmemBackendArray[i].pid = 0;
4723
}
B
Bruce Momjian 已提交
4724
#endif   /* EXEC_BACKEND */
4725

4726 4727 4728

#ifdef WIN32

B
Bruce Momjian 已提交
4729 4730
static pid_t
win32_waitpid(int *exitstatus)
4731
{
B
Bruce Momjian 已提交
4732 4733 4734
	DWORD		dwd;
	ULONG_PTR	key;
	OVERLAPPED *ovl;
4735

B
Bruce Momjian 已提交
4736
	/*
B
Bruce Momjian 已提交
4737 4738
	 * Check if there are any dead children. If there are, return the pid of
	 * the first one that died.
B
Bruce Momjian 已提交
4739
	 */
4740
	if (GetQueuedCompletionStatus(win32ChildQueue, &dwd, &key, &ovl, 0))
4741
	{
B
Bruce Momjian 已提交
4742
		*exitstatus = (int) key;
4743
		return dwd;
4744 4745 4746 4747 4748
	}

	return -1;
}

4749
/*
4750 4751
 * Note! Code below executes on a thread pool! All operations must
 * be thread safe! Note that elog() and friends must *not* be used.
4752
 */
4753 4754
static void WINAPI
pgwin32_deadchild_callback(PVOID lpParameter, BOOLEAN TimerOrWaitFired)
B
Bruce Momjian 已提交
4755
{
B
Bruce Momjian 已提交
4756
	win32_deadchild_waitinfo *childinfo = (win32_deadchild_waitinfo *) lpParameter;
4757
	DWORD		exitcode;
B
Bruce Momjian 已提交
4758

4759
	if (TimerOrWaitFired)
B
Bruce Momjian 已提交
4760 4761
		return;					/* timeout. Should never happen, since we use
								 * INFINITE as timeout value. */
4762

B
Bruce Momjian 已提交
4763 4764 4765 4766
	/*
	 * Remove handle from wait - required even though it's set to wait only
	 * once
	 */
4767 4768 4769 4770 4771 4772 4773
	UnregisterWaitEx(childinfo->waitHandle, NULL);

	if (!GetExitCodeProcess(childinfo->procHandle, &exitcode))
	{
		/*
		 * Should never happen. Inform user and set a fixed exitcode.
		 */
P
Peter Eisentraut 已提交
4774
		write_stderr("could not read exit code for process\n");
4775 4776 4777
		exitcode = 255;
	}

B
Bruce Momjian 已提交
4778
	if (!PostQueuedCompletionStatus(win32ChildQueue, childinfo->procId, (ULONG_PTR) exitcode, NULL))
4779 4780
		write_stderr("could not post child completion status\n");

B
Bruce Momjian 已提交
4781 4782 4783 4784
	/*
	 * Handle is per-process, so we close it here instead of in the
	 * originating thread
	 */
4785 4786
	CloseHandle(childinfo->procHandle);

B
Bruce Momjian 已提交
4787 4788 4789 4790
	/*
	 * Free struct that was allocated before the call to
	 * RegisterWaitForSingleObject()
	 */
4791 4792 4793 4794
	free(childinfo);

	/* Queue SIGCHLD signal */
	pg_queue_signal(SIGCHLD);
4795 4796
}

B
Bruce Momjian 已提交
4797
#endif   /* WIN32 */