execMain.c 60.4 KB
Newer Older
1 2
/*-------------------------------------------------------------------------
 *
3
 * execMain.c
4
 *	  top level executor interface routines
5 6
 *
 * INTERFACE ROUTINES
7 8 9
 *	ExecutorStart()
 *	ExecutorRun()
 *	ExecutorEnd()
10
 *
11 12 13 14
 *	The old ExecutorMain() has been replaced by ExecutorStart(),
 *	ExecutorRun() and ExecutorEnd()
 *
 *	These three procedures are the external interfaces to the executor.
15
 *	In each case, the query descriptor is required as an argument.
16
 *
17
 *	ExecutorStart() must be called at the beginning of execution of any
18 19 20
 *	query plan and ExecutorEnd() should always be called at the end of
 *	execution of a plan.
 *
21
 *	ExecutorRun accepts direction and count arguments that specify whether
22
 *	the plan is to be executed forwards, backwards, and for how many tuples.
23
 *
P
 
PostgreSQL Daemon 已提交
24
 * Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group
B
Add:  
Bruce Momjian 已提交
25
 * Portions Copyright (c) 1994, Regents of the University of California
26 27 28
 *
 *
 * IDENTIFICATION
29
 *	  $PostgreSQL: pgsql/src/backend/executor/execMain.c,v 1.261 2005/11/22 18:17:10 momjian Exp $
30 31 32
 *
 *-------------------------------------------------------------------------
 */
33 34
#include "postgres.h"

35
#include "access/heapam.h"
36
#include "access/xlog.h"
37
#include "catalog/heap.h"
38
#include "catalog/namespace.h"
39
#include "commands/tablecmds.h"
40
#include "commands/trigger.h"
B
Bruce Momjian 已提交
41 42
#include "executor/execdebug.h"
#include "executor/execdefs.h"
43
#include "executor/instrument.h"
B
Bruce Momjian 已提交
44
#include "miscadmin.h"
45
#include "optimizer/clauses.h"
B
Bruce Momjian 已提交
46 47
#include "optimizer/var.h"
#include "parser/parsetree.h"
48
#include "storage/smgr.h"
B
Bruce Momjian 已提交
49
#include "utils/acl.h"
50
#include "utils/guc.h"
51
#include "utils/lsyscache.h"
52
#include "utils/memutils.h"
53

54

55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70
typedef struct execRowMark
{
	Relation	relation;
	Index		rti;
	char		resname[32];
} execRowMark;

typedef struct evalPlanQual
{
	Index		rti;
	EState	   *estate;
	PlanState  *planstate;
	struct evalPlanQual *next;	/* stack of active PlanQual plans */
	struct evalPlanQual *free;	/* list of free PlanQual plans */
} evalPlanQual;

71
/* decls for local routines only used within this module */
72
static void InitPlan(QueryDesc *queryDesc, bool explainOnly);
73
static void initResultRelInfo(ResultRelInfo *resultRelInfo,
B
Bruce Momjian 已提交
74 75
				  Index resultRelationIndex,
				  List *rangeTable,
76 77
				  CmdType operation,
				  bool doInstrument);
78
static TupleTableSlot *ExecutePlan(EState *estate, PlanState *planstate,
B
Bruce Momjian 已提交
79 80 81
			CmdType operation,
			long numberTuples,
			ScanDirection direction,
82
			DestReceiver *dest);
83
static void ExecSelect(TupleTableSlot *slot,
84
		   DestReceiver *dest,
B
Bruce Momjian 已提交
85
		   EState *estate);
86
static void ExecInsert(TupleTableSlot *slot, ItemPointer tupleid,
B
Bruce Momjian 已提交
87
		   EState *estate);
88
static void ExecDelete(TupleTableSlot *slot, ItemPointer tupleid,
B
Bruce Momjian 已提交
89
		   EState *estate);
90
static void ExecUpdate(TupleTableSlot *slot, ItemPointer tupleid,
B
Bruce Momjian 已提交
91
		   EState *estate);
92
static TupleTableSlot *EvalPlanQualNext(EState *estate);
93
static void EndEvalPlanQual(EState *estate);
94 95
static void ExecCheckRTEPerms(RangeTblEntry *rte);
static void ExecCheckXactReadOnly(Query *parsetree);
96
static void EvalPlanQualStart(evalPlanQual *epq, EState *estate,
B
Bruce Momjian 已提交
97
				  evalPlanQual *priorepq);
98
static void EvalPlanQualStop(evalPlanQual *epq);
99

100 101
/* end of local decls */

102

103
/* ----------------------------------------------------------------
104 105 106 107 108
 *		ExecutorStart
 *
 *		This routine must be called at the beginning of any execution of any
 *		query plan
 *
109
 * Takes a QueryDesc previously created by CreateQueryDesc (it's not real
B
Bruce Momjian 已提交
110
 * clear why we bother to separate the two functions, but...).	The tupDesc
111 112
 * field of the QueryDesc is filled in to describe the tuples that will be
 * returned, and the internal fields (estate and planstate) are set up.
113
 *
114 115 116
 * If explainOnly is true, we are not actually intending to run the plan,
 * only to set up for EXPLAIN; so skip unwanted side-effects.
 *
117 118
 * NB: the CurrentMemoryContext when this is called will become the parent
 * of the per-query context used for this Executor invocation.
119 120
 * ----------------------------------------------------------------
 */
121
void
122
ExecutorStart(QueryDesc *queryDesc, bool explainOnly)
123
{
124
	EState	   *estate;
125
	MemoryContext oldcontext;
126

127
	/* sanity checks: queryDesc must not be started already */
128
	Assert(queryDesc != NULL);
129 130
	Assert(queryDesc->estate == NULL);

131
	/*
B
Bruce Momjian 已提交
132 133
	 * If the transaction is read-only, we need to check if any writes are
	 * planned to non-temporary tables.
134
	 */
135 136
	if (XactReadOnly && !explainOnly)
		ExecCheckXactReadOnly(queryDesc->parsetree);
137

138
	/*
139
	 * Build EState, switch into per-query memory context for startup.
140 141 142 143
	 */
	estate = CreateExecutorState();
	queryDesc->estate = estate;

144 145 146 147 148
	oldcontext = MemoryContextSwitchTo(estate->es_query_cxt);

	/*
	 * Fill in parameters, if any, from queryDesc
	 */
149
	estate->es_param_list_info = queryDesc->params;
150

V
Vadim B. Mikheev 已提交
151
	if (queryDesc->plantree->nParamExec > 0)
152
		estate->es_param_exec_vals = (ParamExecData *)
153
			palloc0(queryDesc->plantree->nParamExec * sizeof(ParamExecData));
154

155
	/*
156
	 * Copy other important information into the EState
157
	 */
158 159 160
	estate->es_snapshot = queryDesc->snapshot;
	estate->es_crosscheck_snapshot = queryDesc->crosscheck_snapshot;
	estate->es_instrument = queryDesc->doInstrument;
161

162
	/*
163
	 * Initialize the plan state tree
164
	 */
165
	InitPlan(queryDesc, explainOnly);
166 167

	MemoryContextSwitchTo(oldcontext);
168 169 170
}

/* ----------------------------------------------------------------
171 172 173 174 175 176 177
 *		ExecutorRun
 *
 *		This is the main routine of the executor module. It accepts
 *		the query descriptor from the traffic cop and executes the
 *		query plan.
 *
 *		ExecutorStart must have been called already.
178
 *
179 180 181
 *		If direction is NoMovementScanDirection then nothing is done
 *		except to start up/shut down the destination.  Otherwise,
 *		we retrieve up to 'count' tuples in the specified direction.
182
 *
183
 *		Note: count = 0 is interpreted as no portal limit, i.e., run to
184
 *		completion.
185
 *
186 187
 * ----------------------------------------------------------------
 */
188
TupleTableSlot *
189
ExecutorRun(QueryDesc *queryDesc,
190
			ScanDirection direction, long count)
191
{
192
	EState	   *estate;
193
	CmdType		operation;
194
	DestReceiver *dest;
195
	TupleTableSlot *result;
196 197 198 199 200 201 202 203
	MemoryContext oldcontext;

	/* sanity checks */
	Assert(queryDesc != NULL);

	estate = queryDesc->estate;

	Assert(estate != NULL);
204

B
Bruce Momjian 已提交
205
	/*
206
	 * Switch into per-query memory context
207
	 */
208
	oldcontext = MemoryContextSwitchTo(estate->es_query_cxt);
209

B
Bruce Momjian 已提交
210
	/*
B
Bruce Momjian 已提交
211
	 * extract information from the query descriptor and the query feature.
212
	 */
213 214 215
	operation = queryDesc->operation;
	dest = queryDesc->dest;

B
Bruce Momjian 已提交
216
	/*
217
	 * startup tuple receiver
218
	 */
219 220
	estate->es_processed = 0;
	estate->es_lastoid = InvalidOid;
221

222
	(*dest->rStartup) (dest, operation, queryDesc->tupDesc);
223

224 225 226 227 228 229 230
	/*
	 * run plan
	 */
	if (direction == NoMovementScanDirection)
		result = NULL;
	else
		result = ExecutePlan(estate,
231
							 queryDesc->planstate,
232 233 234
							 operation,
							 count,
							 direction,
235
							 dest);
236

237 238 239
	/*
	 * shutdown receiver
	 */
240
	(*dest->rShutdown) (dest);
241

242 243
	MemoryContextSwitchTo(oldcontext);

244
	return result;
245 246 247
}

/* ----------------------------------------------------------------
248 249
 *		ExecutorEnd
 *
250
 *		This routine must be called at the end of execution of any
251
 *		query plan
252 253 254
 * ----------------------------------------------------------------
 */
void
255
ExecutorEnd(QueryDesc *queryDesc)
256
{
257
	EState	   *estate;
258
	MemoryContext oldcontext;
259

260 261
	/* sanity checks */
	Assert(queryDesc != NULL);
262

263 264
	estate = queryDesc->estate;

265
	Assert(estate != NULL);
266

267
	/*
268
	 * Switch into per-query memory context to run ExecEndPlan
269
	 */
270 271 272
	oldcontext = MemoryContextSwitchTo(estate->es_query_cxt);

	ExecEndPlan(queryDesc->planstate, estate);
273

274
	/*
275
	 * Must switch out of context before destroying it
276
	 */
277
	MemoryContextSwitchTo(oldcontext);
278

279
	/*
280 281
	 * Release EState and per-query memory context.  This should release
	 * everything the executor has allocated.
282
	 */
283 284 285 286 287 288
	FreeExecutorState(estate);

	/* Reset queryDesc fields that no longer point to anything */
	queryDesc->tupDesc = NULL;
	queryDesc->estate = NULL;
	queryDesc->planstate = NULL;
289
}
290

291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326
/* ----------------------------------------------------------------
 *		ExecutorRewind
 *
 *		This routine may be called on an open queryDesc to rewind it
 *		to the start.
 * ----------------------------------------------------------------
 */
void
ExecutorRewind(QueryDesc *queryDesc)
{
	EState	   *estate;
	MemoryContext oldcontext;

	/* sanity checks */
	Assert(queryDesc != NULL);

	estate = queryDesc->estate;

	Assert(estate != NULL);

	/* It's probably not sensible to rescan updating queries */
	Assert(queryDesc->operation == CMD_SELECT);

	/*
	 * Switch into per-query memory context
	 */
	oldcontext = MemoryContextSwitchTo(estate->es_query_cxt);

	/*
	 * rescan plan
	 */
	ExecReScan(queryDesc->planstate, NULL);

	MemoryContextSwitchTo(oldcontext);
}

327

328 329 330 331
/*
 * ExecCheckRTPerms
 *		Check access permissions for all relations listed in a range table.
 */
332
void
333
ExecCheckRTPerms(List *rangeTable)
334
{
335
	ListCell   *l;
336

337
	foreach(l, rangeTable)
338
	{
339
		RangeTblEntry *rte = lfirst(l);
340

341
		ExecCheckRTEPerms(rte);
342 343 344 345 346 347 348 349
	}
}

/*
 * ExecCheckRTEPerms
 *		Check access permissions for a single RTE.
 */
static void
350
ExecCheckRTEPerms(RangeTblEntry *rte)
351
{
352
	AclMode		requiredPerms;
353
	Oid			relOid;
B
Bruce Momjian 已提交
354
	Oid			userid;
355

B
Bruce Momjian 已提交
356
	/*
B
Bruce Momjian 已提交
357 358 359 360 361 362
	 * Only plain-relation RTEs need to be checked here.  Subquery RTEs are
	 * checked by ExecInitSubqueryScan if the subquery is still a separate
	 * subquery --- if it's been pulled up into our query level then the RTEs
	 * are in our rangetable and will be checked here. Function RTEs are
	 * checked by init_fcache when the function is prepared for execution.
	 * Join and special RTEs need no checks.
B
Bruce Momjian 已提交
363
	 */
364
	if (rte->rtekind != RTE_RELATION)
365 366
		return;

367 368 369 370 371 372 373
	/*
	 * No work if requiredPerms is empty.
	 */
	requiredPerms = rte->requiredPerms;
	if (requiredPerms == 0)
		return;

374
	relOid = rte->relid;
375 376

	/*
B
Bruce Momjian 已提交
377
	 * userid to check as: current user unless we have a setuid indication.
378
	 *
379 380 381 382
	 * Note: GetUserId() is presently fast enough that there's no harm in
	 * calling it separately for each RTE.	If that stops being true, we could
	 * call it once in ExecCheckRTPerms and pass the userid down from there.
	 * But for now, no need for the extra clutter.
383
	 */
384
	userid = rte->checkAsUser ? rte->checkAsUser : GetUserId();
385

386
	/*
B
Bruce Momjian 已提交
387
	 * We must have *all* the requiredPerms bits, so use aclmask not aclcheck.
388
	 */
389 390 391 392
	if (pg_class_aclmask(relOid, userid, requiredPerms, ACLMASK_ALL)
		!= requiredPerms)
		aclcheck_error(ACLCHECK_NO_PRIV, ACL_KIND_CLASS,
					   get_rel_name(relOid));
393 394
}

395 396 397
/*
 * Check that the query does not imply any writes to non-temp tables.
 */
398
static void
399
ExecCheckXactReadOnly(Query *parsetree)
400
{
401
	ListCell   *l;
402

403 404 405 406 407 408
	/*
	 * CREATE TABLE AS or SELECT INTO?
	 *
	 * XXX should we allow this if the destination is temp?
	 */
	if (parsetree->into != NULL)
409 410
		goto fail;

411
	/* Fail if write permissions are requested on any non-temp table */
412
	foreach(l, parsetree->rtable)
413
	{
414
		RangeTblEntry *rte = lfirst(l);
415

416
		if (rte->rtekind == RTE_SUBQUERY)
417
		{
418 419 420
			ExecCheckXactReadOnly(rte->subquery);
			continue;
		}
421

422 423
		if (rte->rtekind != RTE_RELATION)
			continue;
424

425 426
		if ((rte->requiredPerms & (~ACL_SELECT)) == 0)
			continue;
427

428 429
		if (isTempNamespace(get_rel_namespace(rte->relid)))
			continue;
430

431
		goto fail;
432 433 434 435 436
	}

	return;

fail:
437 438 439
	ereport(ERROR,
			(errcode(ERRCODE_READ_ONLY_SQL_TRANSACTION),
			 errmsg("transaction is read-only")));
440 441 442
}


443
/* ----------------------------------------------------------------
444 445 446 447
 *		InitPlan
 *
 *		Initializes the query plan: open files, allocate storage
 *		and start up the rule manager
448 449
 * ----------------------------------------------------------------
 */
450
static void
451
InitPlan(QueryDesc *queryDesc, bool explainOnly)
452
{
453
	CmdType		operation = queryDesc->operation;
B
Bruce Momjian 已提交
454 455 456
	Query	   *parseTree = queryDesc->parsetree;
	Plan	   *plan = queryDesc->plantree;
	EState	   *estate = queryDesc->estate;
457
	PlanState  *planstate;
B
Bruce Momjian 已提交
458 459
	List	   *rangeTable;
	Relation	intoRelationDesc;
460
	bool		do_select_into;
B
Bruce Momjian 已提交
461
	TupleDesc	tupType;
462

463
	/*
B
Bruce Momjian 已提交
464 465
	 * Do permissions checks.  It's sufficient to examine the query's top
	 * rangetable here --- subplan RTEs will be checked during
466
	 * ExecInitSubPlan().
467
	 */
468
	ExecCheckRTPerms(parseTree->rtable);
469

B
Bruce Momjian 已提交
470
	/*
B
Bruce Momjian 已提交
471
	 * get information from query descriptor
472
	 */
473
	rangeTable = parseTree->rtable;
474

B
Bruce Momjian 已提交
475
	/*
B
Bruce Momjian 已提交
476
	 * initialize the node's execution state
477
	 */
478 479
	estate->es_range_table = rangeTable;

B
Bruce Momjian 已提交
480
	/*
481
	 * if there is a result relation, initialize result relation stuff
482
	 */
483
	if (parseTree->resultRelation != 0 && operation != CMD_SELECT)
484
	{
485 486 487
		List	   *resultRelations = parseTree->resultRelations;
		int			numResultRelations;
		ResultRelInfo *resultRelInfos;
B
Bruce Momjian 已提交
488

489 490 491 492 493 494
		if (resultRelations != NIL)
		{
			/*
			 * Multiple result relations (due to inheritance)
			 * parseTree->resultRelations identifies them all
			 */
B
Bruce Momjian 已提交
495 496
			ResultRelInfo *resultRelInfo;
			ListCell   *l;
497

498
			numResultRelations = list_length(resultRelations);
499 500 501
			resultRelInfos = (ResultRelInfo *)
				palloc(numResultRelations * sizeof(ResultRelInfo));
			resultRelInfo = resultRelInfos;
502
			foreach(l, resultRelations)
503 504
			{
				initResultRelInfo(resultRelInfo,
505
								  lfirst_int(l),
506
								  rangeTable,
507 508
								  operation,
								  estate->es_instrument);
509 510 511 512 513 514
				resultRelInfo++;
			}
		}
		else
		{
			/*
B
Bruce Momjian 已提交
515
			 * Single result relation identified by parseTree->resultRelation
516 517 518 519 520 521
			 */
			numResultRelations = 1;
			resultRelInfos = (ResultRelInfo *) palloc(sizeof(ResultRelInfo));
			initResultRelInfo(resultRelInfos,
							  parseTree->resultRelation,
							  rangeTable,
522 523
							  operation,
							  estate->es_instrument);
524
		}
525

526 527 528 529
		estate->es_result_relations = resultRelInfos;
		estate->es_num_result_relations = numResultRelations;
		/* Initialize to first or only result rel */
		estate->es_result_relation_info = resultRelInfos;
530
	}
531 532
	else
	{
B
Bruce Momjian 已提交
533
		/*
B
Bruce Momjian 已提交
534
		 * if no result relation, then set state appropriately
535
		 */
536 537
		estate->es_result_relations = NULL;
		estate->es_num_result_relations = 0;
538 539 540
		estate->es_result_relation_info = NULL;
	}

541
	/*
T
Tom Lane 已提交
542
	 * Detect whether we're doing SELECT INTO.  If so, set the es_into_oids
B
Bruce Momjian 已提交
543 544
	 * flag appropriately so that the plan tree will be initialized with the
	 * correct tuple descriptors.
545 546 547
	 */
	do_select_into = false;

548
	if (operation == CMD_SELECT && parseTree->into != NULL)
549 550
	{
		do_select_into = true;
551 552
		estate->es_select_into = true;
		estate->es_into_oids = parseTree->intoHasOids;
553 554
	}

555
	/*
556
	 * Have to lock relations selected FOR UPDATE/FOR SHARE
557
	 */
558
	estate->es_rowMarks = NIL;
559
	estate->es_forUpdate = parseTree->forUpdate;
560
	estate->es_rowNoWait = parseTree->rowNoWait;
561
	if (parseTree->rowMarks != NIL)
562
	{
563
		ListCell   *l;
564

565
		foreach(l, parseTree->rowMarks)
566
		{
567
			Index		rti = lfirst_int(l);
568
			Oid			relid = getrelid(rti, rangeTable);
569 570 571 572
			Relation	relation;
			execRowMark *erm;

			relation = heap_open(relid, RowShareLock);
B
Bruce Momjian 已提交
573
			erm = (execRowMark *) palloc(sizeof(execRowMark));
574
			erm->relation = relation;
575
			erm->rti = rti;
576
			snprintf(erm->resname, sizeof(erm->resname), "ctid%u", rti);
577
			estate->es_rowMarks = lappend(estate->es_rowMarks, erm);
578 579
		}
	}
580

B
Bruce Momjian 已提交
581
	/*
B
Bruce Momjian 已提交
582 583 584
	 * initialize the executor "tuple" table.  We need slots for all the plan
	 * nodes, plus possibly output slots for the junkfilter(s). At this point
	 * we aren't sure if we need junkfilters, so just add slots for them
585 586
	 * unconditionally.  Also, if it's not a SELECT, set up a slot for use for
	 * trigger output tuples.
587 588
	 */
	{
589
		int			nSlots = ExecCountSlotsNode(plan);
590

591
		if (parseTree->resultRelations != NIL)
592
			nSlots += list_length(parseTree->resultRelations);
593 594
		else
			nSlots += 1;
595 596 597
		if (operation != CMD_SELECT)
			nSlots++;

598
		estate->es_tupleTable = ExecCreateTupleTable(nSlots);
599 600 601 602

		if (operation != CMD_SELECT)
			estate->es_trig_tuple_slot =
				ExecAllocTableSlot(estate->es_tupleTable);
603
	}
604

605
	/* mark EvalPlanQual not active */
606
	estate->es_topPlan = plan;
607 608
	estate->es_evalPlanQual = NULL;
	estate->es_evTupleNull = NULL;
609
	estate->es_evTuple = NULL;
610 611
	estate->es_useEvalPlan = false;

B
Bruce Momjian 已提交
612
	/*
B
Bruce Momjian 已提交
613 614 615
	 * initialize the private state information for all the nodes in the query
	 * tree.  This opens files, allocates storage and leaves us ready to start
	 * processing tuples.
616
	 */
617
	planstate = ExecInitNode(plan, estate);
618

B
Bruce Momjian 已提交
619
	/*
B
Bruce Momjian 已提交
620 621 622
	 * Get the tuple descriptor describing the type of tuples to return. (this
	 * is especially important if we are creating a relation with "SELECT
	 * INTO")
623
	 */
624
	tupType = ExecGetResultType(planstate);
625

B
Bruce Momjian 已提交
626
	/*
B
Bruce Momjian 已提交
627 628 629 630 631 632
	 * Initialize the junk filter if needed.  SELECT and INSERT queries need a
	 * filter if there are any junk attrs in the tlist.  INSERT and SELECT
	 * INTO also need a filter if the plan may return raw disk tuples (else
	 * heap_insert will be scribbling on the source relation!). UPDATE and
	 * DELETE always need a filter, since there's always a junk 'ctid'
	 * attribute present --- no need to look first.
633 634
	 */
	{
635
		bool		junk_filter_needed = false;
636
		ListCell   *tlist;
637

638
		switch (operation)
639
		{
640 641
			case CMD_SELECT:
			case CMD_INSERT:
642
				foreach(tlist, plan->targetlist)
643
				{
644 645
					TargetEntry *tle = (TargetEntry *) lfirst(tlist);

646
					if (tle->resjunk)
647 648 649 650
					{
						junk_filter_needed = true;
						break;
					}
651
				}
652
				if (!junk_filter_needed &&
653 654 655
					(operation == CMD_INSERT || do_select_into) &&
					ExecMayReturnRawTuples(planstate))
					junk_filter_needed = true;
656 657 658 659 660 661 662
				break;
			case CMD_UPDATE:
			case CMD_DELETE:
				junk_filter_needed = true;
				break;
			default:
				break;
663 664
		}

665
		if (junk_filter_needed)
666
		{
667
			/*
B
Bruce Momjian 已提交
668 669 670
			 * If there are multiple result relations, each one needs its own
			 * junk filter.  Note this is only possible for UPDATE/DELETE, so
			 * we can't be fooled by some needing a filter and some not.
671 672 673
			 */
			if (parseTree->resultRelations != NIL)
			{
674 675
				PlanState **appendplans;
				int			as_nplans;
676
				ResultRelInfo *resultRelInfo;
677
				int			i;
678 679 680 681

				/* Top plan had better be an Append here. */
				Assert(IsA(plan, Append));
				Assert(((Append *) plan)->isTarget);
682 683 684 685
				Assert(IsA(planstate, AppendState));
				appendplans = ((AppendState *) planstate)->appendplans;
				as_nplans = ((AppendState *) planstate)->as_nplans;
				Assert(as_nplans == estate->es_num_result_relations);
686
				resultRelInfo = estate->es_result_relations;
687
				for (i = 0; i < as_nplans; i++)
688
				{
689
					PlanState  *subplan = appendplans[i];
690 691
					JunkFilter *j;

692
					j = ExecInitJunkFilter(subplan->plan->targetlist,
B
Bruce Momjian 已提交
693 694
							resultRelInfo->ri_RelationDesc->rd_att->tdhasoid,
								  ExecAllocTableSlot(estate->es_tupleTable));
695 696 697
					resultRelInfo->ri_junkFilter = j;
					resultRelInfo++;
				}
B
Bruce Momjian 已提交
698

699
				/*
B
Bruce Momjian 已提交
700 701
				 * Set active junkfilter too; at this point ExecInitAppend has
				 * already selected an active result relation...
702 703 704 705 706 707 708
				 */
				estate->es_junkFilter =
					estate->es_result_relation_info->ri_junkFilter;
			}
			else
			{
				/* Normal case with just one JunkFilter */
709
				JunkFilter *j;
710

711
				j = ExecInitJunkFilter(planstate->plan->targetlist,
712
									   tupType->tdhasoid,
B
Bruce Momjian 已提交
713
								  ExecAllocTableSlot(estate->es_tupleTable));
714 715 716
				estate->es_junkFilter = j;
				if (estate->es_result_relation_info)
					estate->es_result_relation_info->ri_junkFilter = j;
717

718 719 720 721
				/* For SELECT, want to return the cleaned tuple type */
				if (operation == CMD_SELECT)
					tupType = j->jf_cleanTupType;
			}
722 723 724 725
		}
		else
			estate->es_junkFilter = NULL;
	}
726

B
Bruce Momjian 已提交
727
	/*
728
	 * If doing SELECT INTO, initialize the "into" relation.  We must wait
B
Bruce Momjian 已提交
729 730
	 * till now so we have the "clean" result tuple type to create the new
	 * table from.
731 732
	 *
	 * If EXPLAIN, skip creating the "into" relation.
733
	 */
734
	intoRelationDesc = NULL;
735

736
	if (do_select_into && !explainOnly)
737
	{
738 739 740 741 742
		char	   *intoName;
		Oid			namespaceId;
		AclResult	aclresult;
		Oid			intoRelationId;
		TupleDesc	tupdesc;
743

744 745 746 747 748
		/*
		 * find namespace to create in, check permissions
		 */
		intoName = parseTree->into->relname;
		namespaceId = RangeVarGetCreationNamespace(parseTree->into);
749

750 751 752
		aclresult = pg_namespace_aclcheck(namespaceId, GetUserId(),
										  ACL_CREATE);
		if (aclresult != ACLCHECK_OK)
753 754
			aclcheck_error(aclresult, ACL_KIND_NAMESPACE,
						   get_namespace_name(namespaceId));
755

756 757 758 759
		/*
		 * have to copy tupType to get rid of constraints
		 */
		tupdesc = CreateTupleDescCopy(tupType);
760

761 762
		intoRelationId = heap_create_with_catalog(intoName,
												  namespaceId,
763
												  InvalidOid,
764
												  InvalidOid,
765
												  GetUserId(),
766 767 768
												  tupdesc,
												  RELKIND_RELATION,
												  false,
769 770
												  true,
												  0,
771 772
												  ONCOMMIT_NOOP,
												  allowSystemTableMods);
773

774
		FreeTupleDesc(tupdesc);
775

776
		/*
B
Bruce Momjian 已提交
777 778
		 * Advance command counter so that the newly-created relation's
		 * catalog tuples will be visible to heap_open.
779 780
		 */
		CommandCounterIncrement();
781

782
		/*
B
Bruce Momjian 已提交
783 784 785
		 * If necessary, create a TOAST table for the into relation. Note that
		 * AlterTableCreateToastTable ends with CommandCounterIncrement(), so
		 * that the TOAST table will be visible for insertion.
786 787
		 */
		AlterTableCreateToastTable(intoRelationId, true);
788

789 790 791 792
		/*
		 * And open the constructed table for writing.
		 */
		intoRelationDesc = heap_open(intoRelationId, AccessExclusiveLock);
793 794 795 796 797 798 799

		/* use_wal off requires rd_targblock be initially invalid */
		Assert(intoRelationDesc->rd_targblock == InvalidBlockNumber);

		/*
		 * We can skip WAL-logging the insertions, unless PITR is in use.
		 *
800 801 802 803 804
		 * Note that for a non-temp INTO table, this is safe only because we
		 * know that the catalog changes above will have been WAL-logged, and
		 * so RecordTransactionCommit will think it needs to WAL-log the
		 * eventual transaction commit.  Else the commit might be lost, even
		 * though all the data is safely fsync'd ...
805 806
		 */
		estate->es_into_relation_use_wal = XLogArchivingActive();
807 808 809 810
	}

	estate->es_into_relation_descriptor = intoRelationDesc;

811 812
	queryDesc->tupDesc = tupType;
	queryDesc->planstate = planstate;
813 814
}

815 816 817 818 819 820 821
/*
 * Initialize ResultRelInfo data for one result relation
 */
static void
initResultRelInfo(ResultRelInfo *resultRelInfo,
				  Index resultRelationIndex,
				  List *rangeTable,
822 823
				  CmdType operation,
				  bool doInstrument)
824 825 826 827 828 829 830 831 832 833
{
	Oid			resultRelationOid;
	Relation	resultRelationDesc;

	resultRelationOid = getrelid(resultRelationIndex, rangeTable);
	resultRelationDesc = heap_open(resultRelationOid, RowExclusiveLock);

	switch (resultRelationDesc->rd_rel->relkind)
	{
		case RELKIND_SEQUENCE:
834 835
			ereport(ERROR,
					(errcode(ERRCODE_WRONG_OBJECT_TYPE),
836
					 errmsg("cannot change sequence \"%s\"",
B
Bruce Momjian 已提交
837
							RelationGetRelationName(resultRelationDesc))));
838 839
			break;
		case RELKIND_TOASTVALUE:
840 841
			ereport(ERROR,
					(errcode(ERRCODE_WRONG_OBJECT_TYPE),
842
					 errmsg("cannot change TOAST relation \"%s\"",
B
Bruce Momjian 已提交
843
							RelationGetRelationName(resultRelationDesc))));
844 845
			break;
		case RELKIND_VIEW:
846 847
			ereport(ERROR,
					(errcode(ERRCODE_WRONG_OBJECT_TYPE),
848
					 errmsg("cannot change view \"%s\"",
B
Bruce Momjian 已提交
849
							RelationGetRelationName(resultRelationDesc))));
850 851 852 853 854 855 856 857 858 859
			break;
	}

	MemSet(resultRelInfo, 0, sizeof(ResultRelInfo));
	resultRelInfo->type = T_ResultRelInfo;
	resultRelInfo->ri_RangeTableIndex = resultRelationIndex;
	resultRelInfo->ri_RelationDesc = resultRelationDesc;
	resultRelInfo->ri_NumIndices = 0;
	resultRelInfo->ri_IndexRelationDescs = NULL;
	resultRelInfo->ri_IndexRelationInfo = NULL;
860 861
	/* make a copy so as not to depend on relcache info not changing... */
	resultRelInfo->ri_TrigDesc = CopyTriggerDesc(resultRelationDesc->trigdesc);
862 863
	if (resultRelInfo->ri_TrigDesc)
	{
B
Bruce Momjian 已提交
864
		int			n = resultRelInfo->ri_TrigDesc->numtriggers;
865 866 867 868 869 870 871 872 873 874 875 876 877

		resultRelInfo->ri_TrigFunctions = (FmgrInfo *)
			palloc0(n * sizeof(FmgrInfo));
		if (doInstrument)
			resultRelInfo->ri_TrigInstrument = InstrAlloc(n);
		else
			resultRelInfo->ri_TrigInstrument = NULL;
	}
	else
	{
		resultRelInfo->ri_TrigFunctions = NULL;
		resultRelInfo->ri_TrigInstrument = NULL;
	}
878 879 880 881 882
	resultRelInfo->ri_ConstraintExprs = NULL;
	resultRelInfo->ri_junkFilter = NULL;

	/*
	 * If there are indices on the result relation, open them and save
B
Bruce Momjian 已提交
883 884 885
	 * descriptors in the result relation info, so that we can add new index
	 * entries for the tuples we add/update.  We need not do this for a
	 * DELETE, however, since deletion doesn't affect indexes.
886 887 888 889 890 891
	 */
	if (resultRelationDesc->rd_rel->relhasindex &&
		operation != CMD_DELETE)
		ExecOpenIndices(resultRelInfo);
}

892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948
/*
 *		ExecContextForcesOids
 *
 * This is pretty grotty: when doing INSERT, UPDATE, or SELECT INTO,
 * we need to ensure that result tuples have space for an OID iff they are
 * going to be stored into a relation that has OIDs.  In other contexts
 * we are free to choose whether to leave space for OIDs in result tuples
 * (we generally don't want to, but we do if a physical-tlist optimization
 * is possible).  This routine checks the plan context and returns TRUE if the
 * choice is forced, FALSE if the choice is not forced.  In the TRUE case,
 * *hasoids is set to the required value.
 *
 * One reason this is ugly is that all plan nodes in the plan tree will emit
 * tuples with space for an OID, though we really only need the topmost node
 * to do so.  However, node types like Sort don't project new tuples but just
 * return their inputs, and in those cases the requirement propagates down
 * to the input node.  Eventually we might make this code smart enough to
 * recognize how far down the requirement really goes, but for now we just
 * make all plan nodes do the same thing if the top level forces the choice.
 *
 * We assume that estate->es_result_relation_info is already set up to
 * describe the target relation.  Note that in an UPDATE that spans an
 * inheritance tree, some of the target relations may have OIDs and some not.
 * We have to make the decisions on a per-relation basis as we initialize
 * each of the child plans of the topmost Append plan.
 *
 * SELECT INTO is even uglier, because we don't have the INTO relation's
 * descriptor available when this code runs; we have to look aside at a
 * flag set by InitPlan().
 */
bool
ExecContextForcesOids(PlanState *planstate, bool *hasoids)
{
	if (planstate->state->es_select_into)
	{
		*hasoids = planstate->state->es_into_oids;
		return true;
	}
	else
	{
		ResultRelInfo *ri = planstate->state->es_result_relation_info;

		if (ri != NULL)
		{
			Relation	rel = ri->ri_RelationDesc;

			if (rel != NULL)
			{
				*hasoids = rel->rd_rel->relhasoids;
				return true;
			}
		}
	}

	return false;
}

949
/* ----------------------------------------------------------------
950
 *		ExecEndPlan
951
 *
952
 *		Cleans up the query plan -- closes files and frees up storage
953 954 955 956 957 958
 *
 * NOTE: we are no longer very worried about freeing storage per se
 * in this code; FreeExecutorState should be guaranteed to release all
 * memory that needs to be released.  What we are worried about doing
 * is closing relations and dropping buffer pins.  Thus, for example,
 * tuple tables must be cleared or dropped to ensure pins are released.
959 960
 * ----------------------------------------------------------------
 */
961
void
962
ExecEndPlan(PlanState *planstate, EState *estate)
963
{
964 965
	ResultRelInfo *resultRelInfo;
	int			i;
966
	ListCell   *l;
967

968 969 970 971 972 973
	/*
	 * shut down any PlanQual processing we were doing
	 */
	if (estate->es_evalPlanQual != NULL)
		EndEvalPlanQual(estate);

B
Bruce Momjian 已提交
974
	/*
975
	 * shut down the node-type-specific query processing
976
	 */
977
	ExecEndNode(planstate);
978

B
Bruce Momjian 已提交
979
	/*
B
Bruce Momjian 已提交
980
	 * destroy the executor "tuple" table.
981
	 */
982 983
	ExecDropTupleTable(estate->es_tupleTable, true);
	estate->es_tupleTable = NULL;
984

B
Bruce Momjian 已提交
985
	/*
B
Bruce Momjian 已提交
986
	 * close the result relation(s) if any, but hold locks until xact commit.
987
	 */
988 989
	resultRelInfo = estate->es_result_relations;
	for (i = estate->es_num_result_relations; i > 0; i--)
990
	{
991 992 993 994
		/* Close indices and then the relation itself */
		ExecCloseIndices(resultRelInfo);
		heap_close(resultRelInfo->ri_RelationDesc, NoLock);
		resultRelInfo++;
995 996
	}

B
Bruce Momjian 已提交
997
	/*
998
	 * close the "into" relation if necessary, again keeping lock
999
	 */
1000
	if (estate->es_into_relation_descriptor != NULL)
1001 1002
	{
		/*
B
Bruce Momjian 已提交
1003 1004 1005 1006
		 * If we skipped using WAL, and it's not a temp relation, we must
		 * force the relation down to disk before it's safe to commit the
		 * transaction.  This requires forcing out any dirty buffers and then
		 * doing a forced fsync.
1007 1008 1009 1010 1011 1012 1013 1014
		 */
		if (!estate->es_into_relation_use_wal &&
			!estate->es_into_relation_descriptor->rd_istemp)
		{
			FlushRelationBuffers(estate->es_into_relation_descriptor);
			smgrimmedsync(estate->es_into_relation_descriptor->rd_smgr);
		}

1015
		heap_close(estate->es_into_relation_descriptor, NoLock);
1016
	}
1017 1018

	/*
1019
	 * close any relations selected FOR UPDATE/FOR SHARE, again keeping locks
1020
	 */
1021
	foreach(l, estate->es_rowMarks)
1022 1023 1024 1025 1026
	{
		execRowMark *erm = lfirst(l);

		heap_close(erm->relation, NoLock);
	}
1027 1028 1029
}

/* ----------------------------------------------------------------
1030 1031
 *		ExecutePlan
 *
1032
 *		processes the query plan to retrieve 'numberTuples' tuples in the
1033
 *		direction specified.
1034
 *
1035
 *		Retrieves all tuples if numberTuples is 0
1036
 *
1037
 *		result is either a slot containing the last tuple in the case
1038
 *		of a SELECT or NULL otherwise.
1039
 *
1040 1041
 * Note: the ctid attribute is a 'junk' attribute that is removed before the
 * user can see it
1042 1043 1044
 * ----------------------------------------------------------------
 */
static TupleTableSlot *
1045
ExecutePlan(EState *estate,
1046
			PlanState *planstate,
1047
			CmdType operation,
1048
			long numberTuples,
1049
			ScanDirection direction,
1050
			DestReceiver *dest)
1051
{
B
Bruce Momjian 已提交
1052 1053 1054 1055 1056 1057
	JunkFilter *junkfilter;
	TupleTableSlot *slot;
	ItemPointer tupleid = NULL;
	ItemPointerData tuple_ctid;
	long		current_tuple_count;
	TupleTableSlot *result;
1058

B
Bruce Momjian 已提交
1059
	/*
B
Bruce Momjian 已提交
1060
	 * initialize local variables
1061
	 */
1062 1063 1064 1065
	slot = NULL;
	current_tuple_count = 0;
	result = NULL;

B
Bruce Momjian 已提交
1066 1067
	/*
	 * Set the direction.
1068
	 */
1069 1070
	estate->es_direction = direction;

1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086
	/*
	 * Process BEFORE EACH STATEMENT triggers
	 */
	switch (operation)
	{
		case CMD_UPDATE:
			ExecBSUpdateTriggers(estate, estate->es_result_relation_info);
			break;
		case CMD_DELETE:
			ExecBSDeleteTriggers(estate, estate->es_result_relation_info);
			break;
		case CMD_INSERT:
			ExecBSInsertTriggers(estate, estate->es_result_relation_info);
			break;
		default:
			/* do nothing */
1087
			break;
1088 1089
	}

B
Bruce Momjian 已提交
1090
	/*
B
Bruce Momjian 已提交
1091
	 * Loop until we've processed the proper number of tuples from the plan.
1092 1093 1094 1095
	 */

	for (;;)
	{
1096 1097
		/* Reset the per-output-tuple exprcontext */
		ResetPerTupleExprContext(estate);
B
Bruce Momjian 已提交
1098

B
Bruce Momjian 已提交
1099
		/*
B
Bruce Momjian 已提交
1100
		 * Execute the plan and obtain a tuple
1101
		 */
B
Bruce Momjian 已提交
1102
lnext:	;
1103 1104 1105 1106
		if (estate->es_useEvalPlan)
		{
			slot = EvalPlanQualNext(estate);
			if (TupIsNull(slot))
1107
				slot = ExecProcNode(planstate);
1108 1109
		}
		else
1110
			slot = ExecProcNode(planstate);
1111

B
Bruce Momjian 已提交
1112
		/*
B
Bruce Momjian 已提交
1113 1114
		 * if the tuple is null, then we assume there is nothing more to
		 * process so we just return null...
1115 1116 1117 1118 1119
		 */
		if (TupIsNull(slot))
		{
			result = NULL;
			break;
1120 1121
		}

B
Bruce Momjian 已提交
1122
		/*
B
Bruce Momjian 已提交
1123 1124
		 * if we have a junk filter, then project a new tuple with the junk
		 * removed.
1125
		 *
1126
		 * Store this new "clean" tuple in the junkfilter's resultSlot.
B
Bruce Momjian 已提交
1127 1128
		 * (Formerly, we stored it back over the "dirty" tuple, which is WRONG
		 * because that tuple slot has the wrong descriptor.)
1129
		 *
B
Bruce Momjian 已提交
1130
		 * Also, extract all the junk information we need.
1131
		 */
1132
		if ((junkfilter = estate->es_junkFilter) != NULL)
1133
		{
1134 1135
			Datum		datum;
			bool		isNull;
1136

B
Bruce Momjian 已提交
1137
			/*
1138 1139 1140 1141 1142 1143 1144 1145 1146
			 * extract the 'ctid' junk attribute.
			 */
			if (operation == CMD_UPDATE || operation == CMD_DELETE)
			{
				if (!ExecGetJunkAttribute(junkfilter,
										  slot,
										  "ctid",
										  &datum,
										  &isNull))
1147
					elog(ERROR, "could not find junk ctid column");
1148

1149
				/* shouldn't ever get a null result... */
1150
				if (isNull)
1151
					elog(ERROR, "ctid is NULL");
1152 1153

				tupleid = (ItemPointer) DatumGetPointer(datum);
B
Bruce Momjian 已提交
1154
				tuple_ctid = *tupleid;	/* make sure we don't free the ctid!! */
1155 1156
				tupleid = &tuple_ctid;
			}
B
Bruce Momjian 已提交
1157

1158 1159 1160
			/*
			 * Process any FOR UPDATE or FOR SHARE locking requested.
			 */
1161
			else if (estate->es_rowMarks != NIL)
1162
			{
1163
				ListCell   *l;
1164

B
Bruce Momjian 已提交
1165
		lmark:	;
1166
				foreach(l, estate->es_rowMarks)
1167
				{
1168 1169
					execRowMark *erm = lfirst(l);
					HeapTupleData tuple;
1170 1171 1172
					Buffer		buffer;
					ItemPointerData update_ctid;
					TransactionId update_xmax;
1173
					TupleTableSlot *newSlot;
B
Bruce Momjian 已提交
1174 1175
					LockTupleMode lockmode;
					HTSU_Result test;
1176

1177 1178 1179 1180 1181
					if (!ExecGetJunkAttribute(junkfilter,
											  slot,
											  erm->resname,
											  &datum,
											  &isNull))
1182
						elog(ERROR, "could not find junk \"%s\" column",
1183
							 erm->resname);
1184

1185
					/* shouldn't ever get a null result... */
1186
					if (isNull)
1187
						elog(ERROR, "\"%s\" is NULL", erm->resname);
1188

1189 1190
					tuple.t_self = *((ItemPointer) DatumGetPointer(datum));

1191 1192 1193 1194 1195 1196
					if (estate->es_forUpdate)
						lockmode = LockTupleExclusive;
					else
						lockmode = LockTupleShared;

					test = heap_lock_tuple(erm->relation, &tuple, &buffer,
1197 1198 1199
										   &update_ctid, &update_xmax,
										   estate->es_snapshot->curcid,
										   lockmode, estate->es_rowNoWait);
1200 1201 1202 1203
					ReleaseBuffer(buffer);
					switch (test)
					{
						case HeapTupleSelfUpdated:
1204 1205 1206
							/* treat it as deleted; do not process */
							goto lnext;

1207 1208 1209 1210
						case HeapTupleMayBeUpdated:
							break;

						case HeapTupleUpdated:
1211
							if (IsXactIsoLevelSerializable)
1212
								ereport(ERROR,
B
Bruce Momjian 已提交
1213 1214
								 (errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
								  errmsg("could not serialize access due to concurrent update")));
1215 1216
							if (!ItemPointerEquals(&update_ctid,
												   &tuple.t_self))
1217
							{
1218 1219 1220 1221 1222 1223
								/* updated, so look at updated version */
								newSlot = EvalPlanQual(estate,
													   erm->rti,
													   &update_ctid,
													   update_xmax);
								if (!TupIsNull(newSlot))
1224 1225 1226 1227 1228 1229
								{
									slot = newSlot;
									estate->es_useEvalPlan = true;
									goto lmark;
								}
							}
B
Bruce Momjian 已提交
1230 1231 1232

							/*
							 * if tuple was deleted or PlanQual failed for
B
Bruce Momjian 已提交
1233
							 * updated tuple - we must not return this tuple!
1234 1235
							 */
							goto lnext;
1236 1237

						default:
1238
							elog(ERROR, "unrecognized heap_lock_tuple status: %u",
1239
								 test);
B
Bruce Momjian 已提交
1240
							return (NULL);
1241 1242 1243
					}
				}
			}
1244

B
Bruce Momjian 已提交
1245
			/*
1246 1247 1248
			 * Finally create a new "clean" tuple with all junk attributes
			 * removed
			 */
1249
			slot = ExecFilterJunk(junkfilter, slot);
1250
		}
1251

B
Bruce Momjian 已提交
1252
		/*
B
Bruce Momjian 已提交
1253 1254 1255
		 * now that we have a tuple, do the appropriate thing with it.. either
		 * return it to the user, add it to a relation someplace, delete it
		 * from a relation, or modify some of its attributes.
1256 1257 1258
		 */
		switch (operation)
		{
1259
			case CMD_SELECT:
B
Bruce Momjian 已提交
1260
				ExecSelect(slot,	/* slot containing tuple */
1261
						   dest,	/* destination's tuple-receiver obj */
1262
						   estate);
1263 1264
				result = slot;
				break;
1265

1266
			case CMD_INSERT:
1267
				ExecInsert(slot, tupleid, estate);
1268 1269
				result = NULL;
				break;
1270

1271 1272 1273 1274
			case CMD_DELETE:
				ExecDelete(slot, tupleid, estate);
				result = NULL;
				break;
1275

1276
			case CMD_UPDATE:
1277
				ExecUpdate(slot, tupleid, estate);
1278 1279
				result = NULL;
				break;
1280

1281
			default:
1282 1283
				elog(ERROR, "unrecognized operation code: %d",
					 (int) operation);
1284
				result = NULL;
1285
				break;
1286
		}
B
Bruce Momjian 已提交
1287

B
Bruce Momjian 已提交
1288
		/*
B
Bruce Momjian 已提交
1289 1290 1291
		 * check our tuple count.. if we've processed the proper number then
		 * quit, else loop again and process more tuples.  Zero numberTuples
		 * means no limit.
1292
		 */
1293
		current_tuple_count++;
1294
		if (numberTuples && numberTuples == current_tuple_count)
1295
			break;
1296
	}
1297

1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313
	/*
	 * Process AFTER EACH STATEMENT triggers
	 */
	switch (operation)
	{
		case CMD_UPDATE:
			ExecASUpdateTriggers(estate, estate->es_result_relation_info);
			break;
		case CMD_DELETE:
			ExecASDeleteTriggers(estate, estate->es_result_relation_info);
			break;
		case CMD_INSERT:
			ExecASInsertTriggers(estate, estate->es_result_relation_info);
			break;
		default:
			/* do nothing */
1314
			break;
1315 1316
	}

B
Bruce Momjian 已提交
1317
	/*
B
Bruce Momjian 已提交
1318
	 * here, result is either a slot containing a tuple in the case of a
1319
	 * SELECT or NULL otherwise.
1320
	 */
1321
	return result;
1322 1323 1324
}

/* ----------------------------------------------------------------
1325
 *		ExecSelect
1326
 *
1327
 *		SELECTs are easy.. we just pass the tuple to the appropriate
1328
 *		print function.  The only complexity is when we do a
1329
 *		"SELECT INTO", in which case we insert the tuple into
1330 1331
 *		the appropriate relation (note: this is a newly created relation
 *		so we don't need to worry about indices or locks.)
1332 1333 1334
 * ----------------------------------------------------------------
 */
static void
1335
ExecSelect(TupleTableSlot *slot,
1336
		   DestReceiver *dest,
1337
		   EState *estate)
1338
{
B
Bruce Momjian 已提交
1339
	/*
B
Bruce Momjian 已提交
1340
	 * insert the tuple into the "into relation"
1341 1342
	 *
	 * XXX this probably ought to be replaced by a separate destination
1343 1344 1345
	 */
	if (estate->es_into_relation_descriptor != NULL)
	{
1346 1347 1348
		HeapTuple	tuple;

		tuple = ExecCopySlotTuple(slot);
1349
		heap_insert(estate->es_into_relation_descriptor, tuple,
1350 1351 1352
					estate->es_snapshot->curcid,
					estate->es_into_relation_use_wal,
					false);		/* never any point in using FSM */
1353 1354
		/* we know there are no indexes to update */
		heap_freetuple(tuple);
1355 1356 1357
		IncrAppended();
	}

B
Bruce Momjian 已提交
1358
	/*
1359
	 * send the tuple to the destination
1360
	 */
1361
	(*dest->receiveSlot) (slot, dest);
1362 1363
	IncrRetrieved();
	(estate->es_processed)++;
1364 1365 1366
}

/* ----------------------------------------------------------------
1367
 *		ExecInsert
1368
 *
1369
 *		INSERTs are trickier.. we have to insert the tuple into
1370 1371
 *		the base relation and insert appropriate tuples into the
 *		index relations.
1372 1373 1374
 * ----------------------------------------------------------------
 */
static void
1375
ExecInsert(TupleTableSlot *slot,
1376
		   ItemPointer tupleid,
1377
		   EState *estate)
1378
{
1379
	HeapTuple	tuple;
1380
	ResultRelInfo *resultRelInfo;
1381 1382
	Relation	resultRelationDesc;
	Oid			newId;
1383

B
Bruce Momjian 已提交
1384
	/*
B
Bruce Momjian 已提交
1385 1386
	 * get the heap tuple out of the tuple table slot, making sure we have a
	 * writable copy
1387
	 */
1388
	tuple = ExecMaterializeSlot(slot);
1389

B
Bruce Momjian 已提交
1390
	/*
1391
	 * get information on the (current) result relation
1392
	 */
1393 1394
	resultRelInfo = estate->es_result_relation_info;
	resultRelationDesc = resultRelInfo->ri_RelationDesc;
1395 1396

	/* BEFORE ROW INSERT Triggers */
1397
	if (resultRelInfo->ri_TrigDesc &&
B
Bruce Momjian 已提交
1398
		resultRelInfo->ri_TrigDesc->n_before_row[TRIGGER_EVENT_INSERT] > 0)
1399
	{
1400
		HeapTuple	newtuple;
1401

1402
		newtuple = ExecBRInsertTriggers(estate, resultRelInfo, tuple);
1403 1404 1405 1406 1407 1408

		if (newtuple == NULL)	/* "do nothing" */
			return;

		if (newtuple != tuple)	/* modified by Trigger(s) */
		{
1409
			/*
1410 1411
			 * Put the modified tuple into a slot for convenience of routines
			 * below.  We assume the tuple was allocated in per-tuple memory
B
Bruce Momjian 已提交
1412 1413
			 * context, and therefore will go away by itself. The tuple table
			 * slot should not try to clear it.
1414
			 */
1415 1416 1417 1418 1419 1420 1421 1422
			TupleTableSlot *newslot = estate->es_trig_tuple_slot;

			if (newslot->tts_tupleDescriptor != slot->tts_tupleDescriptor)
				ExecSetSlotDescriptor(newslot,
									  slot->tts_tupleDescriptor,
									  false);
			ExecStoreTuple(newtuple, newslot, InvalidBuffer, false);
			slot = newslot;
1423
			tuple = newtuple;
1424 1425 1426
		}
	}

B
Bruce Momjian 已提交
1427
	/*
1428
	 * Check the constraints of the tuple
1429 1430
	 */
	if (resultRelationDesc->rd_att->constr)
1431
		ExecConstraints(resultRelInfo, slot, estate);
1432

B
Bruce Momjian 已提交
1433
	/*
B
Bruce Momjian 已提交
1434
	 * insert the tuple
1435
	 *
B
Bruce Momjian 已提交
1436 1437
	 * Note: heap_insert returns the tid (location) of the new tuple in the
	 * t_self field.
1438
	 */
1439
	newId = heap_insert(resultRelationDesc, tuple,
1440 1441
						estate->es_snapshot->curcid,
						true, true);
1442

1443
	IncrAppended();
1444 1445
	(estate->es_processed)++;
	estate->es_lastoid = newId;
T
Tom Lane 已提交
1446
	setLastTid(&(tuple->t_self));
1447

B
Bruce Momjian 已提交
1448
	/*
1449
	 * insert index entries for tuple
1450
	 */
1451
	if (resultRelInfo->ri_NumIndices > 0)
1452
		ExecInsertIndexTuples(slot, &(tuple->t_self), estate, false);
1453 1454

	/* AFTER ROW INSERT Triggers */
1455
	ExecARInsertTriggers(estate, resultRelInfo, tuple);
1456 1457 1458
}

/* ----------------------------------------------------------------
1459
 *		ExecDelete
1460
 *
1461 1462
 *		DELETE is like UPDATE, except that we delete the tuple and no
 *		index modifications are needed
1463 1464 1465
 * ----------------------------------------------------------------
 */
static void
1466
ExecDelete(TupleTableSlot *slot,
1467
		   ItemPointer tupleid,
1468
		   EState *estate)
1469
{
1470
	ResultRelInfo *resultRelInfo;
B
Bruce Momjian 已提交
1471
	Relation	resultRelationDesc;
B
Bruce Momjian 已提交
1472
	HTSU_Result result;
1473 1474
	ItemPointerData update_ctid;
	TransactionId update_xmax;
1475

B
Bruce Momjian 已提交
1476
	/*
1477
	 * get information on the (current) result relation
1478
	 */
1479 1480
	resultRelInfo = estate->es_result_relation_info;
	resultRelationDesc = resultRelInfo->ri_RelationDesc;
1481 1482

	/* BEFORE ROW DELETE Triggers */
1483
	if (resultRelInfo->ri_TrigDesc &&
B
Bruce Momjian 已提交
1484
		resultRelInfo->ri_TrigDesc->n_before_row[TRIGGER_EVENT_DELETE] > 0)
1485
	{
1486
		bool		dodelete;
1487

1488
		dodelete = ExecBRDeleteTriggers(estate, resultRelInfo, tupleid,
1489
										estate->es_snapshot->curcid);
1490 1491 1492 1493 1494

		if (!dodelete)			/* "do nothing" */
			return;
	}

V
Vadim B. Mikheev 已提交
1495
	/*
B
Bruce Momjian 已提交
1496
	 * delete the tuple
1497
	 *
1498 1499
	 * Note: if es_crosscheck_snapshot isn't InvalidSnapshot, we check that
	 * the row to be deleted is visible to that snapshot, and throw a can't-
B
Bruce Momjian 已提交
1500
	 * serialize error if not.	This is a special-case behavior needed for
1501
	 * referential integrity updates in serializable transactions.
1502
	 */
1503
ldelete:;
1504
	result = heap_delete(resultRelationDesc, tupleid,
1505
						 &update_ctid, &update_xmax,
1506 1507
						 estate->es_snapshot->curcid,
						 estate->es_crosscheck_snapshot,
B
Bruce Momjian 已提交
1508
						 true /* wait for commit */ );
V
Vadim B. Mikheev 已提交
1509 1510 1511
	switch (result)
	{
		case HeapTupleSelfUpdated:
1512
			/* already deleted by self; nothing to do */
V
Vadim B. Mikheev 已提交
1513 1514 1515 1516 1517 1518
			return;

		case HeapTupleMayBeUpdated:
			break;

		case HeapTupleUpdated:
1519
			if (IsXactIsoLevelSerializable)
1520 1521
				ereport(ERROR,
						(errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
1522
						 errmsg("could not serialize access due to concurrent update")));
1523
			else if (!ItemPointerEquals(tupleid, &update_ctid))
1524
			{
1525
				TupleTableSlot *epqslot;
1526

1527 1528 1529 1530
				epqslot = EvalPlanQual(estate,
									   resultRelInfo->ri_RangeTableIndex,
									   &update_ctid,
									   update_xmax);
V
Vadim B. Mikheev 已提交
1531
				if (!TupIsNull(epqslot))
1532
				{
1533
					*tupleid = update_ctid;
1534 1535 1536
					goto ldelete;
				}
			}
1537
			/* tuple already deleted; nothing to do */
V
Vadim B. Mikheev 已提交
1538 1539 1540
			return;

		default:
1541
			elog(ERROR, "unrecognized heap_delete status: %u", result);
V
Vadim B. Mikheev 已提交
1542 1543
			return;
	}
1544 1545 1546 1547

	IncrDeleted();
	(estate->es_processed)++;

B
Bruce Momjian 已提交
1548
	/*
B
Bruce Momjian 已提交
1549
	 * Note: Normally one would think that we have to delete index tuples
1550
	 * associated with the heap tuple now...
1551
	 *
1552 1553 1554
	 * ... but in POSTGRES, we have no need to do this because VACUUM will
	 * take care of it later.  We can't delete index tuples immediately
	 * anyway, since the tuple is still visible to other transactions.
1555 1556 1557
	 */

	/* AFTER ROW DELETE Triggers */
1558
	ExecARDeleteTriggers(estate, resultRelInfo, tupleid);
1559 1560 1561
}

/* ----------------------------------------------------------------
1562
 *		ExecUpdate
1563
 *
1564 1565 1566 1567
 *		note: we can't run UPDATE queries with transactions
 *		off because UPDATEs are actually INSERTs and our
 *		scan will mistakenly loop forever, updating the tuple
 *		it just inserted..	This should be fixed but until it
1568 1569
 *		is, we don't want to get stuck in an infinite loop
 *		which corrupts your database..
1570 1571 1572
 * ----------------------------------------------------------------
 */
static void
1573
ExecUpdate(TupleTableSlot *slot,
B
Bruce Momjian 已提交
1574 1575
		   ItemPointer tupleid,
		   EState *estate)
1576
{
B
Bruce Momjian 已提交
1577
	HeapTuple	tuple;
1578
	ResultRelInfo *resultRelInfo;
B
Bruce Momjian 已提交
1579
	Relation	resultRelationDesc;
B
Bruce Momjian 已提交
1580
	HTSU_Result result;
1581 1582
	ItemPointerData update_ctid;
	TransactionId update_xmax;
1583

B
Bruce Momjian 已提交
1584
	/*
B
Bruce Momjian 已提交
1585
	 * abort the operation if not running transactions
1586 1587
	 */
	if (IsBootstrapProcessingMode())
1588
		elog(ERROR, "cannot UPDATE during bootstrap");
1589

B
Bruce Momjian 已提交
1590
	/*
B
Bruce Momjian 已提交
1591 1592
	 * get the heap tuple out of the tuple table slot, making sure we have a
	 * writable copy
1593
	 */
1594
	tuple = ExecMaterializeSlot(slot);
1595

B
Bruce Momjian 已提交
1596
	/*
1597
	 * get information on the (current) result relation
1598
	 */
1599 1600
	resultRelInfo = estate->es_result_relation_info;
	resultRelationDesc = resultRelInfo->ri_RelationDesc;
1601 1602

	/* BEFORE ROW UPDATE Triggers */
1603
	if (resultRelInfo->ri_TrigDesc &&
B
Bruce Momjian 已提交
1604
		resultRelInfo->ri_TrigDesc->n_before_row[TRIGGER_EVENT_UPDATE] > 0)
1605
	{
1606
		HeapTuple	newtuple;
1607

1608
		newtuple = ExecBRUpdateTriggers(estate, resultRelInfo,
1609
										tupleid, tuple,
1610
										estate->es_snapshot->curcid);
1611 1612 1613 1614 1615 1616

		if (newtuple == NULL)	/* "do nothing" */
			return;

		if (newtuple != tuple)	/* modified by Trigger(s) */
		{
1617
			/*
1618 1619
			 * Put the modified tuple into a slot for convenience of routines
			 * below.  We assume the tuple was allocated in per-tuple memory
B
Bruce Momjian 已提交
1620 1621
			 * context, and therefore will go away by itself. The tuple table
			 * slot should not try to clear it.
1622
			 */
1623 1624 1625 1626 1627 1628 1629 1630
			TupleTableSlot *newslot = estate->es_trig_tuple_slot;

			if (newslot->tts_tupleDescriptor != slot->tts_tupleDescriptor)
				ExecSetSlotDescriptor(newslot,
									  slot->tts_tupleDescriptor,
									  false);
			ExecStoreTuple(newtuple, newslot, InvalidBuffer, false);
			slot = newslot;
1631
			tuple = newtuple;
1632 1633 1634
		}
	}

B
Bruce Momjian 已提交
1635
	/*
1636
	 * Check the constraints of the tuple
1637
	 *
1638 1639
	 * If we generate a new candidate tuple after EvalPlanQual testing, we
	 * must loop back here and recheck constraints.  (We don't need to redo
B
Bruce Momjian 已提交
1640 1641 1642
	 * triggers, however.  If there are any BEFORE triggers then trigger.c
	 * will have done heap_lock_tuple to lock the correct tuple, so there's no
	 * need to do them again.)
1643
	 */
1644
lreplace:;
1645
	if (resultRelationDesc->rd_att->constr)
1646
		ExecConstraints(resultRelInfo, slot, estate);
1647

V
Vadim B. Mikheev 已提交
1648
	/*
B
Bruce Momjian 已提交
1649
	 * replace the heap tuple
1650
	 *
1651 1652
	 * Note: if es_crosscheck_snapshot isn't InvalidSnapshot, we check that
	 * the row to be updated is visible to that snapshot, and throw a can't-
B
Bruce Momjian 已提交
1653
	 * serialize error if not.	This is a special-case behavior needed for
1654
	 * referential integrity updates in serializable transactions.
1655
	 */
1656
	result = heap_update(resultRelationDesc, tupleid, tuple,
1657
						 &update_ctid, &update_xmax,
1658 1659
						 estate->es_snapshot->curcid,
						 estate->es_crosscheck_snapshot,
B
Bruce Momjian 已提交
1660
						 true /* wait for commit */ );
V
Vadim B. Mikheev 已提交
1661 1662 1663
	switch (result)
	{
		case HeapTupleSelfUpdated:
1664
			/* already deleted by self; nothing to do */
V
Vadim B. Mikheev 已提交
1665 1666 1667 1668 1669 1670
			return;

		case HeapTupleMayBeUpdated:
			break;

		case HeapTupleUpdated:
1671
			if (IsXactIsoLevelSerializable)
1672 1673
				ereport(ERROR,
						(errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
1674
						 errmsg("could not serialize access due to concurrent update")));
1675
			else if (!ItemPointerEquals(tupleid, &update_ctid))
1676
			{
1677
				TupleTableSlot *epqslot;
1678

1679 1680 1681 1682
				epqslot = EvalPlanQual(estate,
									   resultRelInfo->ri_RangeTableIndex,
									   &update_ctid,
									   update_xmax);
V
Vadim B. Mikheev 已提交
1683
				if (!TupIsNull(epqslot))
1684
				{
1685
					*tupleid = update_ctid;
1686 1687
					slot = ExecFilterJunk(estate->es_junkFilter, epqslot);
					tuple = ExecMaterializeSlot(slot);
1688 1689 1690
					goto lreplace;
				}
			}
1691
			/* tuple already deleted; nothing to do */
V
Vadim B. Mikheev 已提交
1692 1693 1694
			return;

		default:
1695
			elog(ERROR, "unrecognized heap_update status: %u", result);
V
Vadim B. Mikheev 已提交
1696
			return;
1697 1698 1699 1700 1701
	}

	IncrReplaced();
	(estate->es_processed)++;

B
Bruce Momjian 已提交
1702
	/*
B
Bruce Momjian 已提交
1703 1704 1705
	 * Note: instead of having to update the old index tuples associated with
	 * the heap tuple, all we do is form and insert new index tuples. This is
	 * because UPDATEs are actually DELETEs and INSERTs, and index tuple
1706
	 * deletion is done later by VACUUM (see notes in ExecDelete).	All we do
1707
	 * here is insert new index tuples.  -cim 9/27/89
1708 1709
	 */

B
Bruce Momjian 已提交
1710
	/*
1711
	 * insert index entries for tuple
1712
	 *
B
Bruce Momjian 已提交
1713 1714
	 * Note: heap_update returns the tid (location) of the new tuple in the
	 * t_self field.
1715
	 */
1716
	if (resultRelInfo->ri_NumIndices > 0)
1717
		ExecInsertIndexTuples(slot, &(tuple->t_self), estate, false);
1718 1719

	/* AFTER ROW UPDATE Triggers */
1720
	ExecARUpdateTriggers(estate, resultRelInfo, tupleid, tuple);
1721
}
V
Vadim B. Mikheev 已提交
1722

1723
static const char *
1724 1725
ExecRelCheck(ResultRelInfo *resultRelInfo,
			 TupleTableSlot *slot, EState *estate)
V
Vadim B. Mikheev 已提交
1726
{
1727
	Relation	rel = resultRelInfo->ri_RelationDesc;
1728 1729
	int			ncheck = rel->rd_att->constr->num_check;
	ConstrCheck *check = rel->rd_att->constr->check;
1730
	ExprContext *econtext;
1731
	MemoryContext oldContext;
1732 1733
	List	   *qual;
	int			i;
1734

1735 1736
	/*
	 * If first time through for this result relation, build expression
B
Bruce Momjian 已提交
1737 1738
	 * nodetrees for rel's constraint expressions.  Keep them in the per-query
	 * memory context so they'll survive throughout the query.
1739 1740 1741 1742 1743 1744 1745 1746
	 */
	if (resultRelInfo->ri_ConstraintExprs == NULL)
	{
		oldContext = MemoryContextSwitchTo(estate->es_query_cxt);
		resultRelInfo->ri_ConstraintExprs =
			(List **) palloc(ncheck * sizeof(List *));
		for (i = 0; i < ncheck; i++)
		{
1747 1748
			/* ExecQual wants implicit-AND form */
			qual = make_ands_implicit(stringToNode(check[i].ccbin));
1749
			resultRelInfo->ri_ConstraintExprs[i] = (List *)
1750
				ExecPrepareExpr((Expr *) qual, estate);
1751 1752 1753 1754
		}
		MemoryContextSwitchTo(oldContext);
	}

1755
	/*
B
Bruce Momjian 已提交
1756 1757
	 * We will use the EState's per-tuple context for evaluating constraint
	 * expressions (creating it if it's not already there).
1758
	 */
1759
	econtext = GetPerTupleExprContext(estate);
1760

1761 1762 1763 1764
	/* Arrange for econtext's scan tuple to be the tuple under test */
	econtext->ecxt_scantuple = slot;

	/* And evaluate the constraints */
1765 1766
	for (i = 0; i < ncheck; i++)
	{
1767
		qual = resultRelInfo->ri_ConstraintExprs[i];
1768

1769 1770
		/*
		 * NOTE: SQL92 specifies that a NULL result from a constraint
1771 1772
		 * expression is not to be treated as a failure.  Therefore, tell
		 * ExecQual to return TRUE for NULL.
1773
		 */
1774
		if (!ExecQual(qual, econtext, true))
1775
			return check[i].ccname;
1776 1777
	}

1778
	/* NULL result means no error */
1779
	return NULL;
V
Vadim B. Mikheev 已提交
1780 1781
}

1782
void
1783
ExecConstraints(ResultRelInfo *resultRelInfo,
1784
				TupleTableSlot *slot, EState *estate)
V
Vadim B. Mikheev 已提交
1785
{
1786
	Relation	rel = resultRelInfo->ri_RelationDesc;
1787 1788 1789
	TupleConstr *constr = rel->rd_att->constr;

	Assert(constr);
1790

1791
	if (constr->has_not_null)
V
Vadim B. Mikheev 已提交
1792
	{
1793
		int			natts = rel->rd_att->natts;
1794
		int			attrChk;
1795

1796
		for (attrChk = 1; attrChk <= natts; attrChk++)
1797
		{
B
Bruce Momjian 已提交
1798
			if (rel->rd_att->attrs[attrChk - 1]->attnotnull &&
1799
				slot_attisnull(slot, attrChk))
1800 1801
				ereport(ERROR,
						(errcode(ERRCODE_NOT_NULL_VIOLATION),
1802
						 errmsg("null value in column \"%s\" violates not-null constraint",
B
Bruce Momjian 已提交
1803
						NameStr(rel->rd_att->attrs[attrChk - 1]->attname))));
1804 1805 1806
		}
	}

1807
	if (constr->num_check > 0)
1808
	{
B
Bruce Momjian 已提交
1809
		const char *failed;
1810

1811
		if ((failed = ExecRelCheck(resultRelInfo, slot, estate)) != NULL)
1812 1813
			ereport(ERROR,
					(errcode(ERRCODE_CHECK_VIOLATION),
1814
					 errmsg("new row for relation \"%s\" violates check constraint \"%s\"",
1815
							RelationGetRelationName(rel), failed)));
1816
	}
V
Vadim B. Mikheev 已提交
1817
}
1818

1819 1820 1821 1822 1823
/*
 * Check a modified tuple to see if we want to process its updated version
 * under READ COMMITTED rules.
 *
 * See backend/executor/README for some info about how this works.
1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834
 *
 *	estate - executor state data
 *	rti - rangetable index of table containing tuple
 *	*tid - t_ctid from the outdated tuple (ie, next updated version)
 *	priorXmax - t_xmax from the outdated tuple
 *
 * *tid is also an output parameter: it's modified to hold the TID of the
 * latest version of the tuple (note this may be changed even on failure)
 *
 * Returns a slot containing the new candidate update/delete tuple, or
 * NULL if we determine we shouldn't process the row.
1835
 */
B
Bruce Momjian 已提交
1836
TupleTableSlot *
1837 1838
EvalPlanQual(EState *estate, Index rti,
			 ItemPointer tid, TransactionId priorXmax)
1839
{
1840 1841
	evalPlanQual *epq;
	EState	   *epqstate;
B
Bruce Momjian 已提交
1842 1843
	Relation	relation;
	HeapTupleData tuple;
1844 1845
	HeapTuple	copyTuple = NULL;
	bool		endNode;
1846 1847 1848

	Assert(rti != 0);

1849 1850 1851 1852 1853 1854 1855 1856
	/*
	 * find relation containing target tuple
	 */
	if (estate->es_result_relation_info != NULL &&
		estate->es_result_relation_info->ri_RangeTableIndex == rti)
		relation = estate->es_result_relation_info->ri_RelationDesc;
	else
	{
1857
		ListCell   *l;
1858 1859

		relation = NULL;
1860
		foreach(l, estate->es_rowMarks)
1861 1862 1863 1864 1865 1866 1867 1868
		{
			if (((execRowMark *) lfirst(l))->rti == rti)
			{
				relation = ((execRowMark *) lfirst(l))->relation;
				break;
			}
		}
		if (relation == NULL)
1869
			elog(ERROR, "could not find RowMark for RT index %u", rti);
1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881
	}

	/*
	 * fetch tid tuple
	 *
	 * Loop here to deal with updated or busy tuples
	 */
	tuple.t_self = *tid;
	for (;;)
	{
		Buffer		buffer;

1882
		if (heap_fetch(relation, SnapshotDirty, &tuple, &buffer, true, NULL))
1883
		{
1884 1885
			/*
			 * If xmin isn't what we're expecting, the slot must have been
B
Bruce Momjian 已提交
1886 1887 1888
			 * recycled and reused for an unrelated tuple.	This implies that
			 * the latest version of the row was deleted, so we need do
			 * nothing.  (Should be safe to examine xmin without getting
1889 1890 1891 1892 1893 1894 1895 1896 1897
			 * buffer's content lock, since xmin never changes in an existing
			 * tuple.)
			 */
			if (!TransactionIdEquals(HeapTupleHeaderGetXmin(tuple.t_data),
									 priorXmax))
			{
				ReleaseBuffer(buffer);
				return NULL;
			}
1898

1899
			/* otherwise xmin should not be dirty... */
1900
			if (TransactionIdIsValid(SnapshotDirty->xmin))
1901
				elog(ERROR, "t_xmin is uncommitted in tuple to be updated");
1902 1903

			/*
B
Bruce Momjian 已提交
1904 1905
			 * If tuple is being updated by other transaction then we have to
			 * wait for its commit/abort.
1906
			 */
1907
			if (TransactionIdIsValid(SnapshotDirty->xmax))
1908 1909
			{
				ReleaseBuffer(buffer);
1910 1911
				XactLockTableWait(SnapshotDirty->xmax);
				continue;		/* loop back to repeat heap_fetch */
1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922
			}

			/*
			 * We got tuple - now copy it for use by recheck query.
			 */
			copyTuple = heap_copytuple(&tuple);
			ReleaseBuffer(buffer);
			break;
		}

		/*
B
Bruce Momjian 已提交
1923 1924
		 * If the referenced slot was actually empty, the latest version of
		 * the row must have been deleted, so we need do nothing.
1925
		 */
1926
		if (tuple.t_data == NULL)
1927
		{
1928 1929
			ReleaseBuffer(buffer);
			return NULL;
1930 1931 1932
		}

		/*
1933
		 * As above, if xmin isn't what we're expecting, do nothing.
1934
		 */
1935 1936 1937 1938 1939 1940 1941 1942 1943
		if (!TransactionIdEquals(HeapTupleHeaderGetXmin(tuple.t_data),
								 priorXmax))
		{
			ReleaseBuffer(buffer);
			return NULL;
		}

		/*
		 * If we get here, the tuple was found but failed SnapshotDirty.
B
Bruce Momjian 已提交
1944 1945 1946 1947 1948 1949
		 * Assuming the xmin is either a committed xact or our own xact (as it
		 * certainly should be if we're trying to modify the tuple), this must
		 * mean that the row was updated or deleted by either a committed xact
		 * or our own xact.  If it was deleted, we can ignore it; if it was
		 * updated then chain up to the next version and repeat the whole
		 * test.
1950
		 *
B
Bruce Momjian 已提交
1951 1952
		 * As above, it should be safe to examine xmax and t_ctid without the
		 * buffer content lock, because they can't be changing.
1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966
		 */
		if (ItemPointerEquals(&tuple.t_self, &tuple.t_data->t_ctid))
		{
			/* deleted, so forget about it */
			ReleaseBuffer(buffer);
			return NULL;
		}

		/* updated, so look at the updated row */
		tuple.t_self = tuple.t_data->t_ctid;
		/* updated row should have xmin matching this xmax */
		priorXmax = HeapTupleHeaderGetXmax(tuple.t_data);
		ReleaseBuffer(buffer);
		/* loop back to fetch next in chain */
1967 1968 1969
	}

	/*
B
Bruce Momjian 已提交
1970 1971
	 * For UPDATE/DELETE we have to return tid of actual row we're executing
	 * PQ for.
1972 1973 1974 1975
	 */
	*tid = tuple.t_self;

	/*
1976
	 * Need to run a recheck subquery.	Find or create a PQ stack entry.
1977
	 */
1978
	epq = estate->es_evalPlanQual;
1979 1980
	endNode = true;

1981 1982
	if (epq != NULL && epq->rti == 0)
	{
1983
		/* Top PQ stack entry is idle, so re-use it */
1984
		Assert(!(estate->es_useEvalPlan) && epq->next == NULL);
1985 1986 1987 1988 1989
		epq->rti = rti;
		endNode = false;
	}

	/*
B
Bruce Momjian 已提交
1990 1991 1992 1993
	 * If this is request for another RTE - Ra, - then we have to check wasn't
	 * PlanQual requested for Ra already and if so then Ra' row was updated
	 * again and we have to re-start old execution for Ra and forget all what
	 * we done after Ra was suspended. Cool? -:))
1994
	 */
B
Bruce Momjian 已提交
1995
	if (epq != NULL && epq->rti != rti &&
1996
		epq->estate->es_evTuple[rti - 1] != NULL)
1997 1998 1999
	{
		do
		{
2000 2001
			evalPlanQual *oldepq;

2002
			/* stop execution */
2003 2004 2005 2006
			EvalPlanQualStop(epq);
			/* pop previous PlanQual from the stack */
			oldepq = epq->next;
			Assert(oldepq && oldepq->rti != 0);
2007 2008 2009
			/* push current PQ to freePQ stack */
			oldepq->free = epq;
			epq = oldepq;
2010
			estate->es_evalPlanQual = epq;
2011 2012 2013
		} while (epq->rti != rti);
	}

B
Bruce Momjian 已提交
2014
	/*
B
Bruce Momjian 已提交
2015 2016
	 * If we are requested for another RTE then we have to suspend execution
	 * of current PlanQual and start execution for new one.
2017 2018 2019 2020
	 */
	if (epq == NULL || epq->rti != rti)
	{
		/* try to reuse plan used previously */
B
Bruce Momjian 已提交
2021
		evalPlanQual *newepq = (epq != NULL) ? epq->free : NULL;
2022

2023
		if (newepq == NULL)		/* first call or freePQ stack is empty */
2024
		{
2025
			newepq = (evalPlanQual *) palloc0(sizeof(evalPlanQual));
2026
			newepq->free = NULL;
2027 2028
			newepq->estate = NULL;
			newepq->planstate = NULL;
2029 2030
		}
		else
2031
		{
2032 2033 2034
			/* recycle previously used PlanQual */
			Assert(newepq->estate == NULL);
			epq->free = NULL;
2035
		}
2036
		/* push current PQ to the stack */
2037
		newepq->next = epq;
2038
		epq = newepq;
2039
		estate->es_evalPlanQual = epq;
2040 2041 2042 2043
		epq->rti = rti;
		endNode = false;
	}

2044
	Assert(epq->rti == rti);
2045 2046

	/*
B
Bruce Momjian 已提交
2047 2048 2049 2050 2051 2052
	 * Ok - we're requested for the same RTE.  Unfortunately we still have to
	 * end and restart execution of the plan, because ExecReScan wouldn't
	 * ensure that upper plan nodes would reset themselves.  We could make
	 * that work if insertion of the target tuple were integrated with the
	 * Param mechanism somehow, so that the upper plan nodes know that their
	 * children's outputs have changed.
2053
	 *
B
Bruce Momjian 已提交
2054 2055
	 * Note that the stack of free evalPlanQual nodes is quite useless at the
	 * moment, since it only saves us from pallocing/releasing the
B
Bruce Momjian 已提交
2056 2057
	 * evalPlanQual nodes themselves.  But it will be useful once we implement
	 * ReScan instead of end/restart for re-using PlanQual nodes.
2058 2059
	 */
	if (endNode)
2060
	{
2061
		/* stop execution */
2062
		EvalPlanQualStop(epq);
2063
	}
2064

2065 2066 2067
	/*
	 * Initialize new recheck query.
	 *
B
Bruce Momjian 已提交
2068 2069
	 * Note: if we were re-using PlanQual plans via ExecReScan, we'd need to
	 * instead copy down changeable state from the top plan (including
B
Bruce Momjian 已提交
2070 2071
	 * es_result_relation_info, es_junkFilter) and reset locally changeable
	 * state in the epq (including es_param_exec_vals, es_evTupleNull).
2072 2073 2074
	 */
	EvalPlanQualStart(epq, estate, epq->next);

2075
	/*
B
Bruce Momjian 已提交
2076 2077
	 * free old RTE' tuple, if any, and store target tuple where relation's
	 * scan node will see it
2078
	 */
2079
	epqstate = epq->estate;
2080 2081 2082
	if (epqstate->es_evTuple[rti - 1] != NULL)
		heap_freetuple(epqstate->es_evTuple[rti - 1]);
	epqstate->es_evTuple[rti - 1] = copyTuple;
2083

2084
	return EvalPlanQualNext(estate);
2085 2086
}

B
Bruce Momjian 已提交
2087
static TupleTableSlot *
2088 2089
EvalPlanQualNext(EState *estate)
{
2090 2091
	evalPlanQual *epq = estate->es_evalPlanQual;
	MemoryContext oldcontext;
B
Bruce Momjian 已提交
2092
	TupleTableSlot *slot;
2093 2094 2095 2096

	Assert(epq->rti != 0);

lpqnext:;
2097
	oldcontext = MemoryContextSwitchTo(epq->estate->es_query_cxt);
2098
	slot = ExecProcNode(epq->planstate);
2099
	MemoryContextSwitchTo(oldcontext);
2100 2101 2102 2103 2104 2105

	/*
	 * No more tuples for this PQ. Continue previous one.
	 */
	if (TupIsNull(slot))
	{
2106 2107
		evalPlanQual *oldepq;

2108
		/* stop execution */
2109
		EvalPlanQualStop(epq);
2110
		/* pop old PQ from the stack */
2111 2112
		oldepq = epq->next;
		if (oldepq == NULL)
2113
		{
2114 2115 2116 2117 2118
			/* this is the first (oldest) PQ - mark as free */
			epq->rti = 0;
			estate->es_useEvalPlan = false;
			/* and continue Query execution */
			return (NULL);
2119 2120 2121 2122 2123
		}
		Assert(oldepq->rti != 0);
		/* push current PQ to freePQ stack */
		oldepq->free = epq;
		epq = oldepq;
2124
		estate->es_evalPlanQual = epq;
2125 2126 2127 2128 2129
		goto lpqnext;
	}

	return (slot);
}
2130 2131 2132 2133

static void
EndEvalPlanQual(EState *estate)
{
2134
	evalPlanQual *epq = estate->es_evalPlanQual;
2135

2136 2137
	if (epq->rti == 0)			/* plans already shutdowned */
	{
2138
		Assert(epq->next == NULL);
2139
		return;
2140
	}
2141 2142 2143

	for (;;)
	{
2144 2145
		evalPlanQual *oldepq;

2146
		/* stop execution */
2147
		EvalPlanQualStop(epq);
2148
		/* pop old PQ from the stack */
2149 2150
		oldepq = epq->next;
		if (oldepq == NULL)
2151
		{
2152 2153 2154
			/* this is the first (oldest) PQ - mark as free */
			epq->rti = 0;
			estate->es_useEvalPlan = false;
2155 2156 2157 2158 2159 2160
			break;
		}
		Assert(oldepq->rti != 0);
		/* push current PQ to freePQ stack */
		oldepq->free = epq;
		epq = oldepq;
2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177
		estate->es_evalPlanQual = epq;
	}
}

/*
 * Start execution of one level of PlanQual.
 *
 * This is a cut-down version of ExecutorStart(): we copy some state from
 * the top-level estate rather than initializing it fresh.
 */
static void
EvalPlanQualStart(evalPlanQual *epq, EState *estate, evalPlanQual *priorepq)
{
	EState	   *epqstate;
	int			rtsize;
	MemoryContext oldcontext;

2178
	rtsize = list_length(estate->es_range_table);
2179 2180 2181 2182 2183 2184

	epq->estate = epqstate = CreateExecutorState();

	oldcontext = MemoryContextSwitchTo(epqstate->es_query_cxt);

	/*
B
Bruce Momjian 已提交
2185 2186 2187 2188
	 * The epqstates share the top query's copy of unchanging state such as
	 * the snapshot, rangetable, result-rel info, and external Param info.
	 * They need their own copies of local state, including a tuple table,
	 * es_param_exec_vals, etc.
2189 2190 2191
	 */
	epqstate->es_direction = ForwardScanDirection;
	epqstate->es_snapshot = estate->es_snapshot;
2192
	epqstate->es_crosscheck_snapshot = estate->es_crosscheck_snapshot;
2193 2194 2195 2196 2197 2198
	epqstate->es_range_table = estate->es_range_table;
	epqstate->es_result_relations = estate->es_result_relations;
	epqstate->es_num_result_relations = estate->es_num_result_relations;
	epqstate->es_result_relation_info = estate->es_result_relation_info;
	epqstate->es_junkFilter = estate->es_junkFilter;
	epqstate->es_into_relation_descriptor = estate->es_into_relation_descriptor;
2199
	epqstate->es_into_relation_use_wal = estate->es_into_relation_use_wal;
2200 2201 2202 2203
	epqstate->es_param_list_info = estate->es_param_list_info;
	if (estate->es_topPlan->nParamExec > 0)
		epqstate->es_param_exec_vals = (ParamExecData *)
			palloc0(estate->es_topPlan->nParamExec * sizeof(ParamExecData));
2204
	epqstate->es_rowMarks = estate->es_rowMarks;
2205
	epqstate->es_forUpdate = estate->es_forUpdate;
2206
	epqstate->es_rowNoWait = estate->es_rowNoWait;
2207
	epqstate->es_instrument = estate->es_instrument;
2208 2209
	epqstate->es_select_into = estate->es_select_into;
	epqstate->es_into_oids = estate->es_into_oids;
2210
	epqstate->es_topPlan = estate->es_topPlan;
B
Bruce Momjian 已提交
2211

2212
	/*
B
Bruce Momjian 已提交
2213 2214 2215
	 * Each epqstate must have its own es_evTupleNull state, but all the stack
	 * entries share es_evTuple state.	This allows sub-rechecks to inherit
	 * the value being examined by an outer recheck.
2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257
	 */
	epqstate->es_evTupleNull = (bool *) palloc0(rtsize * sizeof(bool));
	if (priorepq == NULL)
		/* first PQ stack entry */
		epqstate->es_evTuple = (HeapTuple *)
			palloc0(rtsize * sizeof(HeapTuple));
	else
		/* later stack entries share the same storage */
		epqstate->es_evTuple = priorepq->estate->es_evTuple;

	epqstate->es_tupleTable =
		ExecCreateTupleTable(estate->es_tupleTable->size);

	epq->planstate = ExecInitNode(estate->es_topPlan, epqstate);

	MemoryContextSwitchTo(oldcontext);
}

/*
 * End execution of one level of PlanQual.
 *
 * This is a cut-down version of ExecutorEnd(); basically we want to do most
 * of the normal cleanup, but *not* close result relations (which we are
 * just sharing from the outer query).
 */
static void
EvalPlanQualStop(evalPlanQual *epq)
{
	EState	   *epqstate = epq->estate;
	MemoryContext oldcontext;

	oldcontext = MemoryContextSwitchTo(epqstate->es_query_cxt);

	ExecEndNode(epq->planstate);

	ExecDropTupleTable(epqstate->es_tupleTable, true);
	epqstate->es_tupleTable = NULL;

	if (epqstate->es_evTuple[epq->rti - 1] != NULL)
	{
		heap_freetuple(epqstate->es_evTuple[epq->rti - 1]);
		epqstate->es_evTuple[epq->rti - 1] = NULL;
2258
	}
2259 2260 2261 2262 2263 2264 2265

	MemoryContextSwitchTo(oldcontext);

	FreeExecutorState(epqstate);

	epq->estate = NULL;
	epq->planstate = NULL;
2266
}