deadlock.c 33.9 KB
Newer Older
1 2 3 4 5 6 7 8 9
/*-------------------------------------------------------------------------
 *
 * deadlock.c
 *	  POSTGRES deadlock detection code
 *
 * See src/backend/storage/lmgr/README for a description of the deadlock
 * detection and resolution algorithms.
 *
 *
B
Bruce Momjian 已提交
10
 * Portions Copyright (c) 1996-2016, PostgreSQL Global Development Group
11 12 13 14
 * Portions Copyright (c) 1994, Regents of the University of California
 *
 *
 * IDENTIFICATION
15
 *	  src/backend/storage/lmgr/deadlock.c
16 17 18 19
 *
 *	Interface:
 *
 *	DeadLockCheck()
20 21
 *	DeadLockReport()
 *	RememberSimpleDeadLock()
22 23 24 25 26 27 28
 *	InitDeadLockChecking()
 *
 *-------------------------------------------------------------------------
 */
#include "postgres.h"

#include "miscadmin.h"
29
#include "pg_trace.h"
30
#include "pgstat.h"
31
#include "storage/lmgr.h"
32 33 34 35 36
#include "storage/proc.h"
#include "utils/memutils.h"


/* One edge in the waits-for graph */
B
Bruce Momjian 已提交
37 38
typedef struct
{
J
Jan Wieck 已提交
39 40
	PGPROC	   *waiter;			/* the waiting process */
	PGPROC	   *blocker;		/* the process it is waiting for */
41
	LOCK	   *lock;			/* the lock it is waiting for */
B
Bruce Momjian 已提交
42 43
	int			pred;			/* workspace for TopoSort */
	int			link;			/* workspace for TopoSort */
44 45 46
} EDGE;

/* One potential reordering of a lock's wait queue */
B
Bruce Momjian 已提交
47 48 49
typedef struct
{
	LOCK	   *lock;			/* the lock whose wait queue is described */
J
Jan Wieck 已提交
50
	PGPROC	  **procs;			/* array of PGPROC *'s in new wait order */
B
Bruce Momjian 已提交
51
	int			nProcs;
52 53
} WAIT_ORDER;

54
/*
B
Bruce Momjian 已提交
55
 * Information saved about each edge in a detected deadlock cycle.  This
56 57
 * is used to print a diagnostic message upon failure.
 *
58 59 60
 * Note: because we want to examine this info after releasing the lock
 * manager's partition locks, we can't just store LOCK and PGPROC pointers;
 * we must extract out all the info we want to be able to print.
61 62 63 64 65 66
 */
typedef struct
{
	LOCKTAG		locktag;		/* ID of awaited lock object */
	LOCKMODE	lockmode;		/* type of lock we're waiting for */
	int			pid;			/* PID of blocked backend */
67
} DEADLOCK_INFO;
68

69

J
Jan Wieck 已提交
70
static bool DeadLockCheckRecurse(PGPROC *proc);
71
static int	TestConfiguration(PGPROC *startProc);
J
Jan Wieck 已提交
72
static bool FindLockCycle(PGPROC *checkProc,
B
Bruce Momjian 已提交
73
			  EDGE *softEdges, int *nSoftEdges);
74
static bool FindLockCycleRecurse(PGPROC *checkProc, int depth,
B
Bruce Momjian 已提交
75
					 EDGE *softEdges, int *nSoftEdges);
76 77 78
static bool FindLockCycleRecurseMember(PGPROC *checkProc,
						   PGPROC *checkProcLeader,
						   int depth, EDGE *softEdges, int *nSoftEdges);
79 80
static bool ExpandConstraints(EDGE *constraints, int nConstraints);
static bool TopoSort(LOCK *lock, EDGE *constraints, int nConstraints,
J
Jan Wieck 已提交
81
		 PGPROC **ordering);
B
Bruce Momjian 已提交
82

83 84 85 86 87 88 89 90 91 92
#ifdef DEBUG_DEADLOCK
static void PrintLockQueue(LOCK *lock, const char *info);
#endif


/*
 * Working space for the deadlock detector
 */

/* Workspace for FindLockCycle */
B
Bruce Momjian 已提交
93
static PGPROC **visitedProcs;	/* Array of visited procs */
B
Bruce Momjian 已提交
94 95
static int	nVisitedProcs;

96
/* Workspace for TopoSort */
J
Jan Wieck 已提交
97
static PGPROC **topoProcs;		/* Array of not-yet-output procs */
98 99
static int *beforeConstraints;	/* Counts of remaining before-constraints */
static int *afterConstraints;	/* List head for after-constraints */
B
Bruce Momjian 已提交
100

101 102
/* Output area for ExpandConstraints */
static WAIT_ORDER *waitOrders;	/* Array of proposed queue rearrangements */
B
Bruce Momjian 已提交
103
static int	nWaitOrders;
B
Bruce Momjian 已提交
104
static PGPROC **waitOrderProcs; /* Space for waitOrders queue contents */
B
Bruce Momjian 已提交
105

106 107
/* Current list of constraints being considered */
static EDGE *curConstraints;
B
Bruce Momjian 已提交
108 109 110
static int	nCurConstraints;
static int	maxCurConstraints;

111 112
/* Storage space for results from FindLockCycle */
static EDGE *possibleConstraints;
B
Bruce Momjian 已提交
113 114
static int	nPossibleConstraints;
static int	maxPossibleConstraints;
115 116
static DEADLOCK_INFO *deadlockDetails;
static int	nDeadlockDetails;
117

118
/* PGPROC pointer of any blocking autovacuum worker found */
B
Bruce Momjian 已提交
119
static PGPROC *blocking_autovacuum_proc = NULL;
120

121 122 123 124 125

/*
 * InitDeadLockChecking -- initialize deadlock checker during backend startup
 *
 * This does per-backend initialization of the deadlock checker; primarily,
B
Bruce Momjian 已提交
126
 * allocation of working memory for DeadLockCheck.  We do this per-backend
127 128
 * since there's no percentage in making the kernel do copy-on-write
 * inheritance of workspace from the postmaster.  We want to allocate the
129 130 131
 * space at startup because (a) the deadlock checker might be invoked when
 * there's no free memory left, and (b) the checker is normally run inside a
 * signal handler, which is a very dangerous place to invoke palloc from.
132 133 134 135
 */
void
InitDeadLockChecking(void)
{
B
Bruce Momjian 已提交
136
	MemoryContext oldcxt;
137 138 139 140 141

	/* Make sure allocations are permanent */
	oldcxt = MemoryContextSwitchTo(TopMemoryContext);

	/*
B
Bruce Momjian 已提交
142 143
	 * FindLockCycle needs at most MaxBackends entries in visitedProcs[] and
	 * deadlockDetails[].
144
	 */
J
Jan Wieck 已提交
145
	visitedProcs = (PGPROC **) palloc(MaxBackends * sizeof(PGPROC *));
146
	deadlockDetails = (DEADLOCK_INFO *) palloc(MaxBackends * sizeof(DEADLOCK_INFO));
147 148

	/*
B
Bruce Momjian 已提交
149 150
	 * TopoSort needs to consider at most MaxBackends wait-queue entries, and
	 * it needn't run concurrently with FindLockCycle.
151 152 153 154 155 156 157
	 */
	topoProcs = visitedProcs;	/* re-use this space */
	beforeConstraints = (int *) palloc(MaxBackends * sizeof(int));
	afterConstraints = (int *) palloc(MaxBackends * sizeof(int));

	/*
	 * We need to consider rearranging at most MaxBackends/2 wait queues
B
Bruce Momjian 已提交
158 159 160
	 * (since it takes at least two waiters in a queue to create a soft edge),
	 * and the expanded form of the wait queues can't involve more than
	 * MaxBackends total waiters.
161
	 */
T
Tom Lane 已提交
162
	waitOrders = (WAIT_ORDER *)
163
		palloc((MaxBackends / 2) * sizeof(WAIT_ORDER));
J
Jan Wieck 已提交
164
	waitOrderProcs = (PGPROC **) palloc(MaxBackends * sizeof(PGPROC *));
165 166

	/*
B
Bruce Momjian 已提交
167 168 169 170 171 172
	 * Allow at most MaxBackends distinct constraints in a configuration. (Is
	 * this enough?  In practice it seems it should be, but I don't quite see
	 * how to prove it.  If we run out, we might fail to find a workable wait
	 * queue rearrangement even though one exists.)  NOTE that this number
	 * limits the maximum recursion depth of DeadLockCheckRecurse. Making it
	 * really big might potentially allow a stack-overflow problem.
173 174 175 176 177 178
	 */
	maxCurConstraints = MaxBackends;
	curConstraints = (EDGE *) palloc(maxCurConstraints * sizeof(EDGE));

	/*
	 * Allow up to 3*MaxBackends constraints to be saved without having to
B
Bruce Momjian 已提交
179 180 181 182 183
	 * re-run TestConfiguration.  (This is probably more than enough, but we
	 * can survive if we run low on space by doing excess runs of
	 * TestConfiguration to re-compute constraint lists each time needed.) The
	 * last MaxBackends entries in possibleConstraints[] are reserved as
	 * output workspace for FindLockCycle.
184 185 186 187 188 189 190 191 192 193 194 195 196
	 */
	maxPossibleConstraints = MaxBackends * 4;
	possibleConstraints =
		(EDGE *) palloc(maxPossibleConstraints * sizeof(EDGE));

	MemoryContextSwitchTo(oldcxt);
}

/*
 * DeadLockCheck -- Checks for deadlocks for a given process
 *
 * This code looks for deadlocks involving the given process.  If any
 * are found, it tries to rearrange lock wait queues to resolve the
197 198
 * deadlock.  If resolution is impossible, return DS_HARD_DEADLOCK ---
 * the caller is then expected to abort the given proc's transaction.
199
 *
200
 * Caller must already have locked all partitions of the lock tables.
201 202 203
 *
 * On failure, deadlock details are recorded in deadlockDetails[] for
 * subsequent printing by DeadLockReport().  That activity is separate
204 205
 * because (a) we don't want to do it while holding all those LWLocks,
 * and (b) we are typically invoked inside a signal handler.
206
 */
207
DeadLockState
J
Jan Wieck 已提交
208
DeadLockCheck(PGPROC *proc)
209 210 211 212 213 214 215 216 217
{
	int			i,
				j;

	/* Initialize to "no constraints" */
	nCurConstraints = 0;
	nPossibleConstraints = 0;
	nWaitOrders = 0;

218 219 220
	/* Initialize to not blocked by an autovacuum worker */
	blocking_autovacuum_proc = NULL;

221 222
	/* Search for deadlocks and possible fixes */
	if (DeadLockCheckRecurse(proc))
223 224 225 226 227
	{
		/*
		 * Call FindLockCycle one more time, to record the correct
		 * deadlockDetails[] for the basic state with no rearrangements.
		 */
B
Bruce Momjian 已提交
228
		int			nSoftEdges;
229

230 231
		TRACE_POSTGRESQL_DEADLOCK_FOUND();

232 233
		nWaitOrders = 0;
		if (!FindLockCycle(proc, possibleConstraints, &nSoftEdges))
234
			elog(FATAL, "deadlock seems to have disappeared");
235

236
		return DS_HARD_DEADLOCK;	/* cannot find a non-deadlocked state */
237
	}
238 239 240 241

	/* Apply any needed rearrangements of wait queues */
	for (i = 0; i < nWaitOrders; i++)
	{
B
Bruce Momjian 已提交
242
		LOCK	   *lock = waitOrders[i].lock;
J
Jan Wieck 已提交
243
		PGPROC	  **procs = waitOrders[i].procs;
B
Bruce Momjian 已提交
244
		int			nProcs = waitOrders[i].nProcs;
245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263
		PROC_QUEUE *waitQueue = &(lock->waitProcs);

		Assert(nProcs == waitQueue->size);

#ifdef DEBUG_DEADLOCK
		PrintLockQueue(lock, "DeadLockCheck:");
#endif

		/* Reset the queue and re-add procs in the desired order */
		ProcQueueInit(waitQueue);
		for (j = 0; j < nProcs; j++)
		{
			SHMQueueInsertBefore(&(waitQueue->links), &(procs[j]->links));
			waitQueue->size++;
		}

#ifdef DEBUG_DEADLOCK
		PrintLockQueue(lock, "rearranged to:");
#endif
264 265 266

		/* See if any waiters for the lock can be woken up now */
		ProcLockWakeup(GetLocksMethodTable(lock), lock);
267
	}
268

269
	/* Return code tells caller if we had to escape a deadlock or not */
270 271
	if (nWaitOrders > 0)
		return DS_SOFT_DEADLOCK;
272 273
	else if (blocking_autovacuum_proc != NULL)
		return DS_BLOCKED_BY_AUTOVACUUM;
274
	else
275
		return DS_NO_DEADLOCK;
276 277
}

278 279 280 281 282 283 284 285
/*
 * Return the PGPROC of the autovacuum that's blocking a process.
 *
 * We reset the saved pointer as soon as we pass it back.
 */
PGPROC *
GetBlockingAutoVacuumPgproc(void)
{
B
Bruce Momjian 已提交
286
	PGPROC	   *ptr;
287 288 289 290 291 292 293

	ptr = blocking_autovacuum_proc;
	blocking_autovacuum_proc = NULL;

	return ptr;
}

294 295 296 297
/*
 * DeadLockCheckRecurse -- recursively search for valid orderings
 *
 * curConstraints[] holds the current set of constraints being considered
B
Bruce Momjian 已提交
298
 * by an outer level of recursion.  Add to this each possible solution
299 300
 * constraint for any cycle detected at this level.
 *
B
Bruce Momjian 已提交
301
 * Returns TRUE if no solution exists.  Returns FALSE if a deadlock-free
302 303 304 305
 * state is attainable, in which case waitOrders[] shows the required
 * rearrangements of lock wait queues (if any).
 */
static bool
J
Jan Wieck 已提交
306
DeadLockCheckRecurse(PGPROC *proc)
307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331
{
	int			nEdges;
	int			oldPossibleConstraints;
	bool		savedList;
	int			i;

	nEdges = TestConfiguration(proc);
	if (nEdges < 0)
		return true;			/* hard deadlock --- no solution */
	if (nEdges == 0)
		return false;			/* good configuration found */
	if (nCurConstraints >= maxCurConstraints)
		return true;			/* out of room for active constraints? */
	oldPossibleConstraints = nPossibleConstraints;
	if (nPossibleConstraints + nEdges + MaxBackends <= maxPossibleConstraints)
	{
		/* We can save the edge list in possibleConstraints[] */
		nPossibleConstraints += nEdges;
		savedList = true;
	}
	else
	{
		/* Not room; will need to regenerate the edges on-the-fly */
		savedList = false;
	}
B
Bruce Momjian 已提交
332

333 334 335 336 337 338 339 340 341
	/*
	 * Try each available soft edge as an addition to the configuration.
	 */
	for (i = 0; i < nEdges; i++)
	{
		if (!savedList && i > 0)
		{
			/* Regenerate the list of possible added constraints */
			if (nEdges != TestConfiguration(proc))
342
				elog(FATAL, "inconsistent results during deadlock check");
343 344
		}
		curConstraints[nCurConstraints] =
B
Bruce Momjian 已提交
345
			possibleConstraints[oldPossibleConstraints + i];
346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370
		nCurConstraints++;
		if (!DeadLockCheckRecurse(proc))
			return false;		/* found a valid solution! */
		/* give up on that added constraint, try again */
		nCurConstraints--;
	}
	nPossibleConstraints = oldPossibleConstraints;
	return true;				/* no solution found */
}


/*--------------------
 * Test a configuration (current set of constraints) for validity.
 *
 * Returns:
 *		0: the configuration is good (no deadlocks)
 *	   -1: the configuration has a hard deadlock or is not self-consistent
 *		>0: the configuration has one or more soft deadlocks
 *
 * In the soft-deadlock case, one of the soft cycles is chosen arbitrarily
 * and a list of its soft edges is returned beginning at
 * possibleConstraints+nPossibleConstraints.  The return value is the
 * number of soft edges.
 *--------------------
 */
371
static int
J
Jan Wieck 已提交
372
TestConfiguration(PGPROC *startProc)
373
{
B
Bruce Momjian 已提交
374 375 376 377
	int			softFound = 0;
	EDGE	   *softEdges = possibleConstraints + nPossibleConstraints;
	int			nSoftEdges;
	int			i;
378 379 380 381 382 383

	/*
	 * Make sure we have room for FindLockCycle's output.
	 */
	if (nPossibleConstraints + MaxBackends > maxPossibleConstraints)
		return -1;
B
Bruce Momjian 已提交
384

385 386 387 388 389 390
	/*
	 * Expand current constraint set into wait orderings.  Fail if the
	 * constraint set is not self-consistent.
	 */
	if (!ExpandConstraints(curConstraints, nCurConstraints))
		return -1;
B
Bruce Momjian 已提交
391

392
	/*
B
Bruce Momjian 已提交
393 394 395
	 * Check for cycles involving startProc or any of the procs mentioned in
	 * constraints.  We check startProc last because if it has a soft cycle
	 * still to be dealt with, we want to deal with that first.
396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426
	 */
	for (i = 0; i < nCurConstraints; i++)
	{
		if (FindLockCycle(curConstraints[i].waiter, softEdges, &nSoftEdges))
		{
			if (nSoftEdges == 0)
				return -1;		/* hard deadlock detected */
			softFound = nSoftEdges;
		}
		if (FindLockCycle(curConstraints[i].blocker, softEdges, &nSoftEdges))
		{
			if (nSoftEdges == 0)
				return -1;		/* hard deadlock detected */
			softFound = nSoftEdges;
		}
	}
	if (FindLockCycle(startProc, softEdges, &nSoftEdges))
	{
		if (nSoftEdges == 0)
			return -1;			/* hard deadlock detected */
		softFound = nSoftEdges;
	}
	return softFound;
}


/*
 * FindLockCycle -- basic check for deadlock cycles
 *
 * Scan outward from the given proc to see if there is a cycle in the
 * waits-for graph that includes this proc.  Return TRUE if a cycle
427
 * is found, else FALSE.  If a cycle is found, we return a list of
428
 * the "soft edges", if any, included in the cycle.  These edges could
429 430 431 432
 * potentially be eliminated by rearranging wait queues.  We also fill
 * deadlockDetails[] with information about the detected cycle; this info
 * is not used by the deadlock algorithm itself, only to print a useful
 * message after failing.
433 434 435
 *
 * Since we need to be able to check hypothetical configurations that would
 * exist after wait queue rearrangement, the routine pays attention to the
B
Bruce Momjian 已提交
436
 * table of hypothetical queue orders in waitOrders[].  These orders will
437 438 439
 * be believed in preference to the actual ordering seen in the locktable.
 */
static bool
J
Jan Wieck 已提交
440
FindLockCycle(PGPROC *checkProc,
441 442 443 444
			  EDGE *softEdges,	/* output argument */
			  int *nSoftEdges)	/* output argument */
{
	nVisitedProcs = 0;
445
	nDeadlockDetails = 0;
446
	*nSoftEdges = 0;
447
	return FindLockCycleRecurse(checkProc, 0, softEdges, nSoftEdges);
448 449 450
}

static bool
J
Jan Wieck 已提交
451
FindLockCycleRecurse(PGPROC *checkProc,
452
					 int depth,
453 454 455 456
					 EDGE *softEdges,	/* output argument */
					 int *nSoftEdges)	/* output argument */
{
	int			i;
457 458 459 460 461 462 463 464
	dlist_iter	iter;

	/*
	 * If this process is a lock group member, check the leader instead. (Note
	 * that we might be the leader, in which case this is a no-op.)
	 */
	if (checkProc->lockGroupLeader != NULL)
		checkProc = checkProc->lockGroupLeader;
465 466 467 468 469 470 471 472 473 474

	/*
	 * Have we already seen this proc?
	 */
	for (i = 0; i < nVisitedProcs; i++)
	{
		if (visitedProcs[i] == checkProc)
		{
			/* If we return to starting point, we have a deadlock cycle */
			if (i == 0)
475 476
			{
				/*
B
Bruce Momjian 已提交
477 478
				 * record total length of cycle --- outer levels will now fill
				 * deadlockDetails[]
479 480 481 482
				 */
				Assert(depth <= MaxBackends);
				nDeadlockDetails = depth;

483
				return true;
484
			}
B
Bruce Momjian 已提交
485

486
			/*
B
Bruce Momjian 已提交
487 488
			 * Otherwise, we have a cycle but it does not include the start
			 * point, so say "no deadlock".
489 490 491 492 493 494 495
			 */
			return false;
		}
	}
	/* Mark proc as seen */
	Assert(nVisitedProcs < MaxBackends);
	visitedProcs[nVisitedProcs++] = checkProc;
B
Bruce Momjian 已提交
496

497
	/*
498 499 500 501 502 503 504 505 506 507 508 509 510 511
	 * If the process is waiting, there is an outgoing waits-for edge to each
	 * process that blocks it.
	 */
	if (checkProc->links.next != NULL && checkProc->waitLock != NULL &&
		FindLockCycleRecurseMember(checkProc, checkProc, depth, softEdges,
								   nSoftEdges))
		return true;

	/*
	 * If the process is not waiting, there could still be outgoing waits-for
	 * edges if it is part of a lock group, because other members of the lock
	 * group might be waiting even though this process is not.  (Given lock
	 * groups {A1, A2} and {B1, B2}, if A1 waits for B1 and B2 waits for A2,
	 * that is a deadlock even neither of B1 and A2 are waiting for anything.)
512
	 */
513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548
	dlist_foreach(iter, &checkProc->lockGroupMembers)
	{
		PGPROC	   *memberProc;

		memberProc = dlist_container(PGPROC, lockGroupLink, iter.cur);

		if (memberProc->links.next != NULL && memberProc->waitLock != NULL &&
			memberProc != checkProc &&
		  FindLockCycleRecurseMember(memberProc, checkProc, depth, softEdges,
									 nSoftEdges))
			return true;
	}

	return false;
}

static bool
FindLockCycleRecurseMember(PGPROC *checkProc,
						   PGPROC *checkProcLeader,
						   int depth,
						   EDGE *softEdges,		/* output argument */
						   int *nSoftEdges)		/* output argument */
{
	PGPROC	   *proc;
	LOCK	   *lock = checkProc->waitLock;
	PGXACT	   *pgxact;
	PROCLOCK   *proclock;
	SHM_QUEUE  *procLocks;
	LockMethod	lockMethodTable;
	PROC_QUEUE *waitQueue;
	int			queue_size;
	int			conflictMask;
	int			i;
	int			numLockModes,
				lm;

549
	lockMethodTable = GetLocksMethodTable(lock);
B
Bruce Momjian 已提交
550 551
	numLockModes = lockMethodTable->numLockModes;
	conflictMask = lockMethodTable->conflictTab[checkProc->waitLockMode];
B
Bruce Momjian 已提交
552

553
	/*
B
Bruce Momjian 已提交
554
	 * Scan for procs that already hold conflicting locks.  These are "hard"
B
Bruce Momjian 已提交
555
	 * edges in the waits-for graph.
556
	 */
557
	procLocks = &(lock->procLocks);
558

559
	proclock = (PROCLOCK *) SHMQueueNext(procLocks, procLocks,
B
Bruce Momjian 已提交
560
										 offsetof(PROCLOCK, lockLink));
561

562
	while (proclock)
563
	{
564 565
		PGPROC	   *leader;

566
		proc = proclock->tag.myProc;
567
		pgxact = &ProcGlobal->allPgXact[proc->pgprocno];
568
		leader = proc->lockGroupLeader == NULL ? proc : proc->lockGroupLeader;
569

570 571
		/* A proc never blocks itself or any other lock group member */
		if (leader != checkProcLeader)
572 573 574
		{
			for (lm = 1; lm <= numLockModes; lm++)
			{
575 576
				if ((proclock->holdMask & LOCKBIT_ON(lm)) &&
					(conflictMask & LOCKBIT_ON(lm)))
577 578
				{
					/* This proc hard-blocks checkProc */
B
Bruce Momjian 已提交
579
					if (FindLockCycleRecurse(proc, depth + 1,
580 581 582
											 softEdges, nSoftEdges))
					{
						/* fill deadlockDetails[] */
B
Bruce Momjian 已提交
583
						DEADLOCK_INFO *info = &deadlockDetails[depth];
584 585 586 587 588

						info->locktag = lock->tag;
						info->lockmode = checkProc->waitLockMode;
						info->pid = checkProc->pid;

589
						return true;
590
					}
591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618

					/*
					 * No deadlock here, but see if this proc is an autovacuum
					 * that is directly hard-blocking our own proc.  If so,
					 * report it so that the caller can send a cancel signal
					 * to it, if appropriate.  If there's more than one such
					 * proc, it's indeterminate which one will be reported.
					 *
					 * We don't touch autovacuums that are indirectly blocking
					 * us; it's up to the direct blockee to take action.  This
					 * rule simplifies understanding the behavior and ensures
					 * that an autovacuum won't be canceled with less than
					 * deadlock_timeout grace period.
					 *
					 * Note we read vacuumFlags without any locking.  This is
					 * OK only for checking the PROC_IS_AUTOVACUUM flag,
					 * because that flag is set at process start and never
					 * reset.  There is logic elsewhere to avoid canceling an
					 * autovacuum that is working to prevent XID wraparound
					 * problems (which needs to read a different vacuumFlag
					 * bit), but we don't do that here to avoid grabbing
					 * ProcArrayLock.
					 */
					if (checkProc == MyProc &&
						pgxact->vacuumFlags & PROC_IS_AUTOVACUUM)
						blocking_autovacuum_proc = proc;

					/* We're done looking at this proclock */
619 620 621 622 623
					break;
				}
			}
		}

624
		proclock = (PROCLOCK *) SHMQueueNext(procLocks, &proclock->lockLink,
B
Bruce Momjian 已提交
625
											 offsetof(PROCLOCK, lockLink));
626 627 628 629
	}

	/*
	 * Scan for procs that are ahead of this one in the lock's wait queue.
B
Bruce Momjian 已提交
630 631 632
	 * Those that have conflicting requests soft-block this one.  This must be
	 * done after the hard-block search, since if another proc both hard- and
	 * soft-blocks this one, we want to call it a hard edge.
633
	 *
B
Bruce Momjian 已提交
634 635
	 * If there is a proposed re-ordering of the lock's wait order, use that
	 * rather than the current wait order.
636 637 638 639 640 641 642 643 644 645
	 */
	for (i = 0; i < nWaitOrders; i++)
	{
		if (waitOrders[i].lock == lock)
			break;
	}

	if (i < nWaitOrders)
	{
		/* Use the given hypothetical wait queue order */
J
Jan Wieck 已提交
646
		PGPROC	  **procs = waitOrders[i].procs;
647 648 649 650 651

		queue_size = waitOrders[i].nProcs;

		for (i = 0; i < queue_size; i++)
		{
652 653
			PGPROC	   *leader;

654
			proc = procs[i];
655 656
			leader = proc->lockGroupLeader == NULL ? proc :
				proc->lockGroupLeader;
657

658 659 660 661 662 663 664 665
			/*
			 * TopoSort will always return an ordering with group members
			 * adjacent to each other in the wait queue (see comments
			 * therein). So, as soon as we reach a process in the same lock
			 * group as checkProc, we know we've found all the conflicts that
			 * precede any member of the lock group lead by checkProcLeader.
			 */
			if (leader == checkProcLeader)
666 667 668
				break;

			/* Is there a conflict with this guy's request? */
669
			if ((LOCKBIT_ON(proc->waitLockMode) & conflictMask) != 0)
670 671
			{
				/* This proc soft-blocks checkProc */
B
Bruce Momjian 已提交
672
				if (FindLockCycleRecurse(proc, depth + 1,
673
										 softEdges, nSoftEdges))
674
				{
675
					/* fill deadlockDetails[] */
B
Bruce Momjian 已提交
676
					DEADLOCK_INFO *info = &deadlockDetails[depth];
677 678 679 680 681

					info->locktag = lock->tag;
					info->lockmode = checkProc->waitLockMode;
					info->pid = checkProc->pid;

B
Bruce Momjian 已提交
682
					/*
B
Bruce Momjian 已提交
683
					 * Add this edge to the list of soft edges in the cycle
B
Bruce Momjian 已提交
684
					 */
685
					Assert(*nSoftEdges < MaxBackends);
686 687 688
					softEdges[*nSoftEdges].waiter = checkProcLeader;
					softEdges[*nSoftEdges].blocker = leader;
					softEdges[*nSoftEdges].lock = lock;
689 690 691 692 693 694 695 696
					(*nSoftEdges)++;
					return true;
				}
			}
		}
	}
	else
	{
697 698
		PGPROC	   *lastGroupMember = NULL;

699 700 701
		/* Use the true lock wait queue order */
		waitQueue = &(lock->waitProcs);

702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722
		/*
		 * Find the last member of the lock group that is present in the wait
		 * queue.  Anything after this is not a soft lock conflict. If group
		 * locking is not in use, then we know immediately which process we're
		 * looking for, but otherwise we've got to search the wait queue to
		 * find the last process actually present.
		 */
		if (checkProc->lockGroupLeader == NULL)
			lastGroupMember = checkProc;
		else
		{
			proc = (PGPROC *) waitQueue->links.next;
			queue_size = waitQueue->size;
			while (queue_size-- > 0)
			{
				if (proc->lockGroupLeader == checkProcLeader)
					lastGroupMember = proc;
				proc = (PGPROC *) proc->links.next;
			}
			Assert(lastGroupMember != NULL);
		}
723

724 725 726 727 728
		/*
		 * OK, now rescan (or scan) the queue to identify the soft conflicts.
		 */
		queue_size = waitQueue->size;
		proc = (PGPROC *) waitQueue->links.next;
729 730
		while (queue_size-- > 0)
		{
731 732 733 734 735
			PGPROC	   *leader;

			leader = proc->lockGroupLeader == NULL ? proc :
				proc->lockGroupLeader;

736
			/* Done when we reach the target proc */
737
			if (proc == lastGroupMember)
738 739 740
				break;

			/* Is there a conflict with this guy's request? */
741 742
			if ((LOCKBIT_ON(proc->waitLockMode) & conflictMask) != 0 &&
				leader != checkProcLeader)
743 744
			{
				/* This proc soft-blocks checkProc */
B
Bruce Momjian 已提交
745
				if (FindLockCycleRecurse(proc, depth + 1,
746
										 softEdges, nSoftEdges))
747
				{
748
					/* fill deadlockDetails[] */
B
Bruce Momjian 已提交
749
					DEADLOCK_INFO *info = &deadlockDetails[depth];
750 751 752 753 754

					info->locktag = lock->tag;
					info->lockmode = checkProc->waitLockMode;
					info->pid = checkProc->pid;

B
Bruce Momjian 已提交
755
					/*
B
Bruce Momjian 已提交
756
					 * Add this edge to the list of soft edges in the cycle
B
Bruce Momjian 已提交
757
					 */
758
					Assert(*nSoftEdges < MaxBackends);
759 760 761
					softEdges[*nSoftEdges].waiter = checkProcLeader;
					softEdges[*nSoftEdges].blocker = leader;
					softEdges[*nSoftEdges].lock = lock;
762 763 764 765 766
					(*nSoftEdges)++;
					return true;
				}
			}

767
			proc = (PGPROC *) proc->links.next;
768 769 770 771 772 773 774 775 776 777 778 779 780 781 782
		}
	}

	/*
	 * No conflict detected here.
	 */
	return false;
}


/*
 * ExpandConstraints -- expand a list of constraints into a set of
 *		specific new orderings for affected wait queues
 *
 * Input is a list of soft edges to be reversed.  The output is a list
J
Jan Wieck 已提交
783
 * of nWaitOrders WAIT_ORDER structs in waitOrders[], with PGPROC array
784 785 786 787 788 789 790 791 792 793 794 795 796 797
 * workspace in waitOrderProcs[].
 *
 * Returns TRUE if able to build an ordering that satisfies all the
 * constraints, FALSE if not (there are contradictory constraints).
 */
static bool
ExpandConstraints(EDGE *constraints,
				  int nConstraints)
{
	int			nWaitOrderProcs = 0;
	int			i,
				j;

	nWaitOrders = 0;
B
Bruce Momjian 已提交
798

799
	/*
B
Bruce Momjian 已提交
800
	 * Scan constraint list backwards.  This is because the last-added
B
Bruce Momjian 已提交
801 802
	 * constraint is the only one that could fail, and so we want to test it
	 * for inconsistency first.
803
	 */
B
Bruce Momjian 已提交
804
	for (i = nConstraints; --i >= 0;)
805
	{
806
		LOCK	   *lock = constraints[i].lock;
807 808

		/* Did we already make a list for this lock? */
B
Bruce Momjian 已提交
809
		for (j = nWaitOrders; --j >= 0;)
810 811 812 813 814 815 816 817 818 819 820 821
		{
			if (waitOrders[j].lock == lock)
				break;
		}
		if (j >= 0)
			continue;
		/* No, so allocate a new list */
		waitOrders[nWaitOrders].lock = lock;
		waitOrders[nWaitOrders].procs = waitOrderProcs + nWaitOrderProcs;
		waitOrders[nWaitOrders].nProcs = lock->waitProcs.size;
		nWaitOrderProcs += lock->waitProcs.size;
		Assert(nWaitOrderProcs <= MaxBackends);
B
Bruce Momjian 已提交
822

823
		/*
B
Bruce Momjian 已提交
824 825
		 * Do the topo sort.  TopoSort need not examine constraints after this
		 * one, since they must be for different locks.
826
		 */
B
Bruce Momjian 已提交
827
		if (!TopoSort(lock, constraints, i + 1,
828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850
					  waitOrders[nWaitOrders].procs))
			return false;
		nWaitOrders++;
	}
	return true;
}


/*
 * TopoSort -- topological sort of a wait queue
 *
 * Generate a re-ordering of a lock's wait queue that satisfies given
 * constraints about certain procs preceding others.  (Each such constraint
 * is a fact of a partial ordering.)  Minimize rearrangement of the queue
 * not needed to achieve the partial ordering.
 *
 * This is a lot simpler and slower than, for example, the topological sort
 * algorithm shown in Knuth's Volume 1.  However, Knuth's method doesn't
 * try to minimize the damage to the existing order.  In practice we are
 * not likely to be working with more than a few constraints, so the apparent
 * slowness of the algorithm won't really matter.
 *
 * The initial queue ordering is taken directly from the lock's wait queue.
J
Jan Wieck 已提交
851
 * The output is an array of PGPROC pointers, of length equal to the lock's
852
 * wait queue length (the caller is responsible for providing this space).
B
Bruce Momjian 已提交
853
 * The partial order is specified by an array of EDGE structs.  Each EDGE
854 855 856 857 858 859 860 861 862 863 864
 * is one that we need to reverse, therefore the "waiter" must appear before
 * the "blocker" in the output array.  The EDGE array may well contain
 * edges associated with other locks; these should be ignored.
 *
 * Returns TRUE if able to build an ordering that satisfies all the
 * constraints, FALSE if not (there are contradictory constraints).
 */
static bool
TopoSort(LOCK *lock,
		 EDGE *constraints,
		 int nConstraints,
J
Jan Wieck 已提交
865
		 PGPROC **ordering)		/* output argument */
866 867 868
{
	PROC_QUEUE *waitQueue = &(lock->waitProcs);
	int			queue_size = waitQueue->size;
J
Jan Wieck 已提交
869
	PGPROC	   *proc;
870 871
	int			i,
				j,
872
				jj,
873
				k,
874
				kk,
875 876 877
				last;

	/* First, fill topoProcs[] array with the procs in their current order */
878
	proc = (PGPROC *) waitQueue->links.next;
879 880 881
	for (i = 0; i < queue_size; i++)
	{
		topoProcs[i] = proc;
882
		proc = (PGPROC *) proc->links.next;
883 884 885
	}

	/*
B
Bruce Momjian 已提交
886 887 888 889 890 891 892 893
	 * Scan the constraints, and for each proc in the array, generate a count
	 * of the number of constraints that say it must be before something else,
	 * plus a list of the constraints that say it must be after something
	 * else. The count for the j'th proc is stored in beforeConstraints[j],
	 * and the head of its list in afterConstraints[j].  Each constraint
	 * stores its list link in constraints[i].link (note any constraint will
	 * be in just one list). The array index for the before-proc of the i'th
	 * constraint is remembered in constraints[i].pred.
894 895 896 897 898 899 900 901 902 903
	 *
	 * Note that it's not necessarily the case that every constraint affects
	 * this particular wait queue.  Prior to group locking, a process could be
	 * waiting for at most one lock.  But a lock group can be waiting for
	 * zero, one, or multiple locks.  Since topoProcs[] is an array of the
	 * processes actually waiting, while constraints[] is an array of group
	 * leaders, we've got to scan through topoProcs[] for each constraint,
	 * checking whether both a waiter and a blocker for that group are
	 * present.  If so, the constraint is relevant to this wait queue; if not,
	 * it isn't.
904 905 906 907 908
	 */
	MemSet(beforeConstraints, 0, queue_size * sizeof(int));
	MemSet(afterConstraints, 0, queue_size * sizeof(int));
	for (i = 0; i < nConstraints; i++)
	{
909 910 911 912 913 914 915 916
		/*
		 * Find a representative process that is on the lock queue and part of
		 * the waiting lock group.  This may or may not be the leader, which
		 * may or may not be waiting at all.  If there are any other processes
		 * in the same lock group on the queue, set their number of
		 * beforeConstraints to -1 to indicate that they should be emitted
		 * with their groupmates rather than considered separately.
		 */
917
		proc = constraints[i].waiter;
918 919
		Assert(proc != NULL);
		jj = -1;
B
Bruce Momjian 已提交
920
		for (j = queue_size; --j >= 0;)
921
		{
922 923 924 925 926 927 928 929 930 931 932 933
			PGPROC	   *waiter = topoProcs[j];

			if (waiter == proc || waiter->lockGroupLeader == proc)
			{
				Assert(waiter->waitLock == lock);
				if (jj == -1)
					jj = j;
				else
				{
					Assert(beforeConstraints[j] <= 0);
					beforeConstraints[j] = -1;
				}
934
				break;
935
			}
936
		}
937 938 939 940 941 942 943 944 945 946

		/* If no matching waiter, constraint is not relevant to this lock. */
		if (jj < 0)
			continue;

		/*
		 * Similarly, find a representative process that is on the lock queue
		 * and waiting for the blocking lock group.  Again, this could be the
		 * leader but does not need to be.
		 */
947
		proc = constraints[i].blocker;
948 949
		Assert(proc != NULL);
		kk = -1;
B
Bruce Momjian 已提交
950
		for (k = queue_size; --k >= 0;)
951
		{
952 953 954 955 956 957 958 959 960 961 962 963 964
			PGPROC	   *blocker = topoProcs[k];

			if (blocker == proc || blocker->lockGroupLeader == proc)
			{
				Assert(blocker->waitLock == lock);
				if (kk == -1)
					kk = k;
				else
				{
					Assert(beforeConstraints[k] <= 0);
					beforeConstraints[k] = -1;
				}
			}
965
		}
966 967 968 969 970 971

		/* If no matching blocker, constraint is not relevant to this lock. */
		if (kk < 0)
			continue;

		beforeConstraints[jj]++;	/* waiter must come before */
972
		/* add this constraint to list of after-constraints for blocker */
973 974 975
		constraints[i].pred = jj;
		constraints[i].link = afterConstraints[kk];
		afterConstraints[kk] = i + 1;
976
	}
977

978
	/*--------------------
B
Bruce Momjian 已提交
979
	 * Now scan the topoProcs array backwards.  At each step, output the
980 981 982
	 * last proc that has no remaining before-constraints plus any other
	 * members of the same lock group; then decrease the beforeConstraints
	 * count of each of the procs it was constrained against.
983 984 985 986 987 988
	 * i = index of ordering[] entry we want to output this time
	 * j = search index for topoProcs[]
	 * k = temp for scanning constraint list for proc j
	 * last = last non-null index in topoProcs (avoid redundant searches)
	 *--------------------
	 */
B
Bruce Momjian 已提交
989
	last = queue_size - 1;
990
	for (i = queue_size - 1; i >= 0;)
991
	{
992 993 994
		int			c;
		int			nmatches = 0;

995 996 997 998 999 1000 1001 1002
		/* Find next candidate to output */
		while (topoProcs[last] == NULL)
			last--;
		for (j = last; j >= 0; j--)
		{
			if (topoProcs[j] != NULL && beforeConstraints[j] == 0)
				break;
		}
1003

1004 1005 1006
		/* If no available candidate, topological sort fails */
		if (j < 0)
			return false;
1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033

		/*
		 * Output everything in the lock group.  There's no point in outputing
		 * an ordering where members of the same lock group are not
		 * consecutive on the wait queue: if some other waiter is between two
		 * requests that belong to the same group, then either it conflicts
		 * with both of them and is certainly not a solution; or it conflicts
		 * with at most one of them and is thus isomorphic to an ordering
		 * where the group members are consecutive.
		 */
		proc = topoProcs[j];
		if (proc->lockGroupLeader != NULL)
			proc = proc->lockGroupLeader;
		Assert(proc != NULL);
		for (c = 0; c <= last; ++c)
		{
			if (topoProcs[c] == proc || (topoProcs[c] != NULL &&
									  topoProcs[c]->lockGroupLeader == proc))
			{
				ordering[i - nmatches] = topoProcs[c];
				topoProcs[c] = NULL;
				++nmatches;
			}
		}
		Assert(nmatches > 0);
		i -= nmatches;

1034
		/* Update beforeConstraints counts of its predecessors */
B
Bruce Momjian 已提交
1035 1036
		for (k = afterConstraints[j]; k > 0; k = constraints[k - 1].link)
			beforeConstraints[constraints[k - 1].pred]--;
1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048
	}

	/* Done */
	return true;
}

#ifdef DEBUG_DEADLOCK
static void
PrintLockQueue(LOCK *lock, const char *info)
{
	PROC_QUEUE *waitQueue = &(lock->waitProcs);
	int			queue_size = waitQueue->size;
J
Jan Wieck 已提交
1049
	PGPROC	   *proc;
1050 1051
	int			i;

1052 1053
	printf("%s lock %p queue ", info, lock);
	proc = (PGPROC *) waitQueue->links.next;
1054 1055 1056
	for (i = 0; i < queue_size; i++)
	{
		printf(" %d", proc->pid);
1057
		proc = (PGPROC *) proc->links.next;
1058 1059 1060 1061 1062
	}
	printf("\n");
	fflush(stdout);
}
#endif
1063

1064
/*
1065
 * Report a detected deadlock, with available details.
1066 1067 1068 1069
 */
void
DeadLockReport(void)
{
1070 1071
	StringInfoData clientbuf;	/* errdetail for client */
	StringInfoData logbuf;		/* errdetail for server log */
1072
	StringInfoData locktagbuf;
B
Bruce Momjian 已提交
1073
	int			i;
1074

1075 1076
	initStringInfo(&clientbuf);
	initStringInfo(&logbuf);
1077
	initStringInfo(&locktagbuf);
1078

1079
	/* Generate the "waits for" lines sent to the client */
1080 1081
	for (i = 0; i < nDeadlockDetails; i++)
	{
B
Bruce Momjian 已提交
1082
		DEADLOCK_INFO *info = &deadlockDetails[i];
1083 1084 1085
		int			nextpid;

		/* The last proc waits for the first one... */
B
Bruce Momjian 已提交
1086
		if (i < nDeadlockDetails - 1)
1087 1088 1089 1090
			nextpid = info[1].pid;
		else
			nextpid = deadlockDetails[0].pid;

1091 1092
		/* reset locktagbuf to hold next object description */
		resetStringInfo(&locktagbuf);
1093

1094
		DescribeLockTag(&locktagbuf, &info->locktag);
1095

1096
		if (i > 0)
1097
			appendStringInfoChar(&clientbuf, '\n');
1098

1099
		appendStringInfo(&clientbuf,
B
Bruce Momjian 已提交
1100
				  _("Process %d waits for %s on %s; blocked by process %d."),
1101
						 info->pid,
1102 1103
						 GetLockmodeName(info->locktag.locktag_lockmethodid,
										 info->lockmode),
1104
						 locktagbuf.data,
1105
						 nextpid);
1106
	}
1107

1108 1109 1110 1111 1112 1113 1114 1115 1116
	/* Duplicate all the above for the server ... */
	appendStringInfoString(&logbuf, clientbuf.data);

	/* ... and add info about query strings */
	for (i = 0; i < nDeadlockDetails; i++)
	{
		DEADLOCK_INFO *info = &deadlockDetails[i];

		appendStringInfoChar(&logbuf, '\n');
1117

1118
		appendStringInfo(&logbuf,
1119 1120
						 _("Process %d: %s"),
						 info->pid,
1121
					  pgstat_get_backend_current_activity(info->pid, false));
1122
	}
1123

1124 1125
	pgstat_report_deadlock();

1126 1127 1128
	ereport(ERROR,
			(errcode(ERRCODE_T_R_DEADLOCK_DETECTED),
			 errmsg("deadlock detected"),
1129
			 errdetail_internal("%s", clientbuf.data),
1130 1131
			 errdetail_log("%s", logbuf.data),
			 errhint("See server log for query details.")));
1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144
}

/*
 * RememberSimpleDeadLock: set up info for DeadLockReport when ProcSleep
 * detects a trivial (two-way) deadlock.  proc1 wants to block for lockmode
 * on lock, but proc2 is already waiting and would be blocked by proc1.
 */
void
RememberSimpleDeadLock(PGPROC *proc1,
					   LOCKMODE lockmode,
					   LOCK *lock,
					   PGPROC *proc2)
{
B
Bruce Momjian 已提交
1145
	DEADLOCK_INFO *info = &deadlockDetails[0];
1146 1147 1148 1149 1150 1151 1152 1153 1154 1155

	info->locktag = lock->tag;
	info->lockmode = lockmode;
	info->pid = proc1->pid;
	info++;
	info->locktag = proc2->waitLock->tag;
	info->lockmode = proc2->waitLockMode;
	info->pid = proc2->pid;
	nDeadlockDetails = 2;
}