execMain.c 62.5 KB
Newer Older
1 2
/*-------------------------------------------------------------------------
 *
3
 * execMain.c
4
 *	  top level executor interface routines
5 6
 *
 * INTERFACE ROUTINES
7 8 9
 *	ExecutorStart()
 *	ExecutorRun()
 *	ExecutorEnd()
10
 *
11 12 13 14
 *	The old ExecutorMain() has been replaced by ExecutorStart(),
 *	ExecutorRun() and ExecutorEnd()
 *
 *	These three procedures are the external interfaces to the executor.
15
 *	In each case, the query descriptor is required as an argument.
16
 *
17
 *	ExecutorStart() must be called at the beginning of execution of any
18 19 20
 *	query plan and ExecutorEnd() should always be called at the end of
 *	execution of a plan.
 *
21
 *	ExecutorRun accepts direction and count arguments that specify whether
22
 *	the plan is to be executed forwards, backwards, and for how many tuples.
23
 *
24
 * Portions Copyright (c) 1996-2006, PostgreSQL Global Development Group
B
Add:  
Bruce Momjian 已提交
25
 * Portions Copyright (c) 1994, Regents of the University of California
26 27 28
 *
 *
 * IDENTIFICATION
29
 *	  $PostgreSQL: pgsql/src/backend/executor/execMain.c,v 1.276 2006/07/14 14:52:18 momjian Exp $
30 31 32
 *
 *-------------------------------------------------------------------------
 */
33 34
#include "postgres.h"

35
#include "access/heapam.h"
36
#include "access/reloptions.h"
37 38
#include "access/transam.h"
#include "access/xact.h"
39
#include "catalog/heap.h"
40
#include "catalog/namespace.h"
41
#include "commands/tablecmds.h"
42
#include "commands/tablespace.h"
43
#include "commands/trigger.h"
B
Bruce Momjian 已提交
44
#include "executor/execdebug.h"
45
#include "executor/instrument.h"
B
Bruce Momjian 已提交
46
#include "miscadmin.h"
47
#include "optimizer/clauses.h"
48
#include "parser/parse_clause.h"
49
#include "parser/parsetree.h"
50
#include "storage/smgr.h"
B
Bruce Momjian 已提交
51
#include "utils/acl.h"
52
#include "utils/lsyscache.h"
53
#include "utils/memutils.h"
54

55

56 57 58 59 60 61 62 63 64
typedef struct evalPlanQual
{
	Index		rti;
	EState	   *estate;
	PlanState  *planstate;
	struct evalPlanQual *next;	/* stack of active PlanQual plans */
	struct evalPlanQual *free;	/* list of free PlanQual plans */
} evalPlanQual;

65
/* decls for local routines only used within this module */
66
static void InitPlan(QueryDesc *queryDesc, int eflags);
67
static void initResultRelInfo(ResultRelInfo *resultRelInfo,
B
Bruce Momjian 已提交
68 69
				  Index resultRelationIndex,
				  List *rangeTable,
70 71
				  CmdType operation,
				  bool doInstrument);
72
static TupleTableSlot *ExecutePlan(EState *estate, PlanState *planstate,
B
Bruce Momjian 已提交
73 74 75
			CmdType operation,
			long numberTuples,
			ScanDirection direction,
76
			DestReceiver *dest);
77
static void ExecSelect(TupleTableSlot *slot,
78
		   DestReceiver *dest,
B
Bruce Momjian 已提交
79
		   EState *estate);
80
static void ExecInsert(TupleTableSlot *slot, ItemPointer tupleid,
B
Bruce Momjian 已提交
81
		   EState *estate);
82
static void ExecDelete(TupleTableSlot *slot, ItemPointer tupleid,
B
Bruce Momjian 已提交
83
		   EState *estate);
84
static void ExecUpdate(TupleTableSlot *slot, ItemPointer tupleid,
B
Bruce Momjian 已提交
85
		   EState *estate);
86
static TupleTableSlot *EvalPlanQualNext(EState *estate);
87
static void EndEvalPlanQual(EState *estate);
88 89
static void ExecCheckRTEPerms(RangeTblEntry *rte);
static void ExecCheckXactReadOnly(Query *parsetree);
90
static void EvalPlanQualStart(evalPlanQual *epq, EState *estate,
B
Bruce Momjian 已提交
91
				  evalPlanQual *priorepq);
92
static void EvalPlanQualStop(evalPlanQual *epq);
93

94 95
/* end of local decls */

96

97
/* ----------------------------------------------------------------
98 99 100 101 102
 *		ExecutorStart
 *
 *		This routine must be called at the beginning of any execution of any
 *		query plan
 *
103
 * Takes a QueryDesc previously created by CreateQueryDesc (it's not real
B
Bruce Momjian 已提交
104
 * clear why we bother to separate the two functions, but...).	The tupDesc
105 106
 * field of the QueryDesc is filled in to describe the tuples that will be
 * returned, and the internal fields (estate and planstate) are set up.
107
 *
108
 * eflags contains flag bits as described in executor.h.
109
 *
110 111
 * NB: the CurrentMemoryContext when this is called will become the parent
 * of the per-query context used for this Executor invocation.
112 113
 * ----------------------------------------------------------------
 */
114
void
115
ExecutorStart(QueryDesc *queryDesc, int eflags)
116
{
117
	EState	   *estate;
118
	MemoryContext oldcontext;
119

120
	/* sanity checks: queryDesc must not be started already */
121
	Assert(queryDesc != NULL);
122 123
	Assert(queryDesc->estate == NULL);

124
	/*
B
Bruce Momjian 已提交
125
	 * If the transaction is read-only, we need to check if any writes are
126
	 * planned to non-temporary tables.  EXPLAIN is considered read-only.
127
	 */
128
	if (XactReadOnly && !(eflags & EXEC_FLAG_EXPLAIN_ONLY))
129
		ExecCheckXactReadOnly(queryDesc->parsetree);
130

131
	/*
132
	 * Build EState, switch into per-query memory context for startup.
133 134 135 136
	 */
	estate = CreateExecutorState();
	queryDesc->estate = estate;

137 138 139 140 141
	oldcontext = MemoryContextSwitchTo(estate->es_query_cxt);

	/*
	 * Fill in parameters, if any, from queryDesc
	 */
142
	estate->es_param_list_info = queryDesc->params;
143

V
Vadim B. Mikheev 已提交
144
	if (queryDesc->plantree->nParamExec > 0)
145
		estate->es_param_exec_vals = (ParamExecData *)
146
			palloc0(queryDesc->plantree->nParamExec * sizeof(ParamExecData));
147

148
	/*
149
	 * Copy other important information into the EState
150
	 */
151 152 153
	estate->es_snapshot = queryDesc->snapshot;
	estate->es_crosscheck_snapshot = queryDesc->crosscheck_snapshot;
	estate->es_instrument = queryDesc->doInstrument;
154

155
	/*
156
	 * Initialize the plan state tree
157
	 */
158
	InitPlan(queryDesc, eflags);
159 160

	MemoryContextSwitchTo(oldcontext);
161 162 163
}

/* ----------------------------------------------------------------
164 165 166 167 168 169 170
 *		ExecutorRun
 *
 *		This is the main routine of the executor module. It accepts
 *		the query descriptor from the traffic cop and executes the
 *		query plan.
 *
 *		ExecutorStart must have been called already.
171
 *
172 173 174
 *		If direction is NoMovementScanDirection then nothing is done
 *		except to start up/shut down the destination.  Otherwise,
 *		we retrieve up to 'count' tuples in the specified direction.
175
 *
176
 *		Note: count = 0 is interpreted as no portal limit, i.e., run to
177
 *		completion.
178
 *
179 180
 * ----------------------------------------------------------------
 */
181
TupleTableSlot *
182
ExecutorRun(QueryDesc *queryDesc,
183
			ScanDirection direction, long count)
184
{
185
	EState	   *estate;
186
	CmdType		operation;
187
	DestReceiver *dest;
188
	TupleTableSlot *result;
189 190 191 192 193 194 195 196
	MemoryContext oldcontext;

	/* sanity checks */
	Assert(queryDesc != NULL);

	estate = queryDesc->estate;

	Assert(estate != NULL);
197

B
Bruce Momjian 已提交
198
	/*
199
	 * Switch into per-query memory context
200
	 */
201
	oldcontext = MemoryContextSwitchTo(estate->es_query_cxt);
202

B
Bruce Momjian 已提交
203
	/*
B
Bruce Momjian 已提交
204
	 * extract information from the query descriptor and the query feature.
205
	 */
206 207 208
	operation = queryDesc->operation;
	dest = queryDesc->dest;

B
Bruce Momjian 已提交
209
	/*
210
	 * startup tuple receiver
211
	 */
212 213
	estate->es_processed = 0;
	estate->es_lastoid = InvalidOid;
214

215
	(*dest->rStartup) (dest, operation, queryDesc->tupDesc);
216

217 218 219
	/*
	 * run plan
	 */
220
	if (ScanDirectionIsNoMovement(direction))
221 222 223
		result = NULL;
	else
		result = ExecutePlan(estate,
224
							 queryDesc->planstate,
225 226 227
							 operation,
							 count,
							 direction,
228
							 dest);
229

230 231 232
	/*
	 * shutdown receiver
	 */
233
	(*dest->rShutdown) (dest);
234

235 236
	MemoryContextSwitchTo(oldcontext);

237
	return result;
238 239 240
}

/* ----------------------------------------------------------------
241 242
 *		ExecutorEnd
 *
243
 *		This routine must be called at the end of execution of any
244
 *		query plan
245 246 247
 * ----------------------------------------------------------------
 */
void
248
ExecutorEnd(QueryDesc *queryDesc)
249
{
250
	EState	   *estate;
251
	MemoryContext oldcontext;
252

253 254
	/* sanity checks */
	Assert(queryDesc != NULL);
255

256 257
	estate = queryDesc->estate;

258
	Assert(estate != NULL);
259

260
	/*
261
	 * Switch into per-query memory context to run ExecEndPlan
262
	 */
263 264 265
	oldcontext = MemoryContextSwitchTo(estate->es_query_cxt);

	ExecEndPlan(queryDesc->planstate, estate);
266

267
	/*
268
	 * Must switch out of context before destroying it
269
	 */
270
	MemoryContextSwitchTo(oldcontext);
271

272
	/*
273 274
	 * Release EState and per-query memory context.  This should release
	 * everything the executor has allocated.
275
	 */
276 277 278 279 280 281
	FreeExecutorState(estate);

	/* Reset queryDesc fields that no longer point to anything */
	queryDesc->tupDesc = NULL;
	queryDesc->estate = NULL;
	queryDesc->planstate = NULL;
282
}
283

284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319
/* ----------------------------------------------------------------
 *		ExecutorRewind
 *
 *		This routine may be called on an open queryDesc to rewind it
 *		to the start.
 * ----------------------------------------------------------------
 */
void
ExecutorRewind(QueryDesc *queryDesc)
{
	EState	   *estate;
	MemoryContext oldcontext;

	/* sanity checks */
	Assert(queryDesc != NULL);

	estate = queryDesc->estate;

	Assert(estate != NULL);

	/* It's probably not sensible to rescan updating queries */
	Assert(queryDesc->operation == CMD_SELECT);

	/*
	 * Switch into per-query memory context
	 */
	oldcontext = MemoryContextSwitchTo(estate->es_query_cxt);

	/*
	 * rescan plan
	 */
	ExecReScan(queryDesc->planstate, NULL);

	MemoryContextSwitchTo(oldcontext);
}

320

321 322 323 324
/*
 * ExecCheckRTPerms
 *		Check access permissions for all relations listed in a range table.
 */
325
void
326
ExecCheckRTPerms(List *rangeTable)
327
{
328
	ListCell   *l;
329

330
	foreach(l, rangeTable)
331
	{
332
		RangeTblEntry *rte = lfirst(l);
333

334
		ExecCheckRTEPerms(rte);
335 336 337 338 339 340 341 342
	}
}

/*
 * ExecCheckRTEPerms
 *		Check access permissions for a single RTE.
 */
static void
343
ExecCheckRTEPerms(RangeTblEntry *rte)
344
{
345
	AclMode		requiredPerms;
346
	Oid			relOid;
B
Bruce Momjian 已提交
347
	Oid			userid;
348

B
Bruce Momjian 已提交
349
	/*
B
Bruce Momjian 已提交
350 351 352 353 354 355
	 * Only plain-relation RTEs need to be checked here.  Subquery RTEs are
	 * checked by ExecInitSubqueryScan if the subquery is still a separate
	 * subquery --- if it's been pulled up into our query level then the RTEs
	 * are in our rangetable and will be checked here. Function RTEs are
	 * checked by init_fcache when the function is prepared for execution.
	 * Join and special RTEs need no checks.
B
Bruce Momjian 已提交
356
	 */
357
	if (rte->rtekind != RTE_RELATION)
358 359
		return;

360 361 362 363 364 365 366
	/*
	 * No work if requiredPerms is empty.
	 */
	requiredPerms = rte->requiredPerms;
	if (requiredPerms == 0)
		return;

367
	relOid = rte->relid;
368 369

	/*
B
Bruce Momjian 已提交
370
	 * userid to check as: current user unless we have a setuid indication.
371
	 *
372 373 374 375
	 * Note: GetUserId() is presently fast enough that there's no harm in
	 * calling it separately for each RTE.	If that stops being true, we could
	 * call it once in ExecCheckRTPerms and pass the userid down from there.
	 * But for now, no need for the extra clutter.
376
	 */
377
	userid = rte->checkAsUser ? rte->checkAsUser : GetUserId();
378

379
	/*
B
Bruce Momjian 已提交
380
	 * We must have *all* the requiredPerms bits, so use aclmask not aclcheck.
381
	 */
382 383 384 385
	if (pg_class_aclmask(relOid, userid, requiredPerms, ACLMASK_ALL)
		!= requiredPerms)
		aclcheck_error(ACLCHECK_NO_PRIV, ACL_KIND_CLASS,
					   get_rel_name(relOid));
386 387
}

388 389 390
/*
 * Check that the query does not imply any writes to non-temp tables.
 */
391
static void
392
ExecCheckXactReadOnly(Query *parsetree)
393
{
394
	ListCell   *l;
395

396 397 398 399 400 401
	/*
	 * CREATE TABLE AS or SELECT INTO?
	 *
	 * XXX should we allow this if the destination is temp?
	 */
	if (parsetree->into != NULL)
402 403
		goto fail;

404
	/* Fail if write permissions are requested on any non-temp table */
405
	foreach(l, parsetree->rtable)
406
	{
407
		RangeTblEntry *rte = lfirst(l);
408

409
		if (rte->rtekind == RTE_SUBQUERY)
410
		{
411 412 413
			ExecCheckXactReadOnly(rte->subquery);
			continue;
		}
414

415 416
		if (rte->rtekind != RTE_RELATION)
			continue;
417

418 419
		if ((rte->requiredPerms & (~ACL_SELECT)) == 0)
			continue;
420

421 422
		if (isTempNamespace(get_rel_namespace(rte->relid)))
			continue;
423

424
		goto fail;
425 426 427 428 429
	}

	return;

fail:
430 431 432
	ereport(ERROR,
			(errcode(ERRCODE_READ_ONLY_SQL_TRANSACTION),
			 errmsg("transaction is read-only")));
433 434 435
}


436
/* ----------------------------------------------------------------
437 438 439 440
 *		InitPlan
 *
 *		Initializes the query plan: open files, allocate storage
 *		and start up the rule manager
441 442
 * ----------------------------------------------------------------
 */
443
static void
444
InitPlan(QueryDesc *queryDesc, int eflags)
445
{
446
	CmdType		operation = queryDesc->operation;
B
Bruce Momjian 已提交
447 448 449
	Query	   *parseTree = queryDesc->parsetree;
	Plan	   *plan = queryDesc->plantree;
	EState	   *estate = queryDesc->estate;
450
	PlanState  *planstate;
B
Bruce Momjian 已提交
451 452
	List	   *rangeTable;
	Relation	intoRelationDesc;
453
	bool		do_select_into;
B
Bruce Momjian 已提交
454
	TupleDesc	tupType;
455
	ListCell   *l;
456

457
	/*
B
Bruce Momjian 已提交
458 459
	 * Do permissions checks.  It's sufficient to examine the query's top
	 * rangetable here --- subplan RTEs will be checked during
460
	 * ExecInitSubPlan().
461
	 */
462
	ExecCheckRTPerms(parseTree->rtable);
463

B
Bruce Momjian 已提交
464
	/*
B
Bruce Momjian 已提交
465
	 * get information from query descriptor
466
	 */
467
	rangeTable = parseTree->rtable;
468

B
Bruce Momjian 已提交
469
	/*
B
Bruce Momjian 已提交
470
	 * initialize the node's execution state
471
	 */
472 473
	estate->es_range_table = rangeTable;

B
Bruce Momjian 已提交
474
	/*
475
	 * if there is a result relation, initialize result relation stuff
476
	 */
477
	if (parseTree->resultRelation != 0 && operation != CMD_SELECT)
478
	{
479 480 481
		List	   *resultRelations = parseTree->resultRelations;
		int			numResultRelations;
		ResultRelInfo *resultRelInfos;
B
Bruce Momjian 已提交
482

483 484 485 486 487 488
		if (resultRelations != NIL)
		{
			/*
			 * Multiple result relations (due to inheritance)
			 * parseTree->resultRelations identifies them all
			 */
B
Bruce Momjian 已提交
489
			ResultRelInfo *resultRelInfo;
490

491
			numResultRelations = list_length(resultRelations);
492 493 494
			resultRelInfos = (ResultRelInfo *)
				palloc(numResultRelations * sizeof(ResultRelInfo));
			resultRelInfo = resultRelInfos;
495
			foreach(l, resultRelations)
496 497
			{
				initResultRelInfo(resultRelInfo,
498
								  lfirst_int(l),
499
								  rangeTable,
500 501
								  operation,
								  estate->es_instrument);
502 503 504 505 506 507
				resultRelInfo++;
			}
		}
		else
		{
			/*
B
Bruce Momjian 已提交
508
			 * Single result relation identified by parseTree->resultRelation
509 510 511 512 513 514
			 */
			numResultRelations = 1;
			resultRelInfos = (ResultRelInfo *) palloc(sizeof(ResultRelInfo));
			initResultRelInfo(resultRelInfos,
							  parseTree->resultRelation,
							  rangeTable,
515 516
							  operation,
							  estate->es_instrument);
517
		}
518

519 520 521 522
		estate->es_result_relations = resultRelInfos;
		estate->es_num_result_relations = numResultRelations;
		/* Initialize to first or only result rel */
		estate->es_result_relation_info = resultRelInfos;
523
	}
524 525
	else
	{
B
Bruce Momjian 已提交
526
		/*
B
Bruce Momjian 已提交
527
		 * if no result relation, then set state appropriately
528
		 */
529 530
		estate->es_result_relations = NULL;
		estate->es_num_result_relations = 0;
531 532 533
		estate->es_result_relation_info = NULL;
	}

534
	/*
T
Tom Lane 已提交
535
	 * Detect whether we're doing SELECT INTO.  If so, set the es_into_oids
B
Bruce Momjian 已提交
536 537
	 * flag appropriately so that the plan tree will be initialized with the
	 * correct tuple descriptors.
538 539 540
	 */
	do_select_into = false;

541
	if (operation == CMD_SELECT && parseTree->into != NULL)
542 543
	{
		do_select_into = true;
544
		estate->es_select_into = true;
545
		estate->es_into_oids = interpretOidsOption(parseTree->intoOptions);
546 547
	}

548
	/*
549
	 * Have to lock relations selected FOR UPDATE/FOR SHARE
550
	 */
551
	estate->es_rowMarks = NIL;
552
	foreach(l, parseTree->rowMarks)
553
	{
554 555 556 557 558 559 560 561 562 563 564 565 566
		RowMarkClause *rc = (RowMarkClause *) lfirst(l);
		Oid			relid = getrelid(rc->rti, rangeTable);
		Relation	relation;
		ExecRowMark *erm;

		relation = heap_open(relid, RowShareLock);
		erm = (ExecRowMark *) palloc(sizeof(ExecRowMark));
		erm->relation = relation;
		erm->rti = rc->rti;
		erm->forUpdate = rc->forUpdate;
		erm->noWait = rc->noWait;
		snprintf(erm->resname, sizeof(erm->resname), "ctid%u", rc->rti);
		estate->es_rowMarks = lappend(estate->es_rowMarks, erm);
567
	}
568

B
Bruce Momjian 已提交
569
	/*
B
Bruce Momjian 已提交
570 571 572
	 * initialize the executor "tuple" table.  We need slots for all the plan
	 * nodes, plus possibly output slots for the junkfilter(s). At this point
	 * we aren't sure if we need junkfilters, so just add slots for them
573 574
	 * unconditionally.  Also, if it's not a SELECT, set up a slot for use for
	 * trigger output tuples.
575 576
	 */
	{
577
		int			nSlots = ExecCountSlotsNode(plan);
578

579
		if (parseTree->resultRelations != NIL)
580
			nSlots += list_length(parseTree->resultRelations);
581 582
		else
			nSlots += 1;
583 584 585
		if (operation != CMD_SELECT)
			nSlots++;

586
		estate->es_tupleTable = ExecCreateTupleTable(nSlots);
587 588 589 590

		if (operation != CMD_SELECT)
			estate->es_trig_tuple_slot =
				ExecAllocTableSlot(estate->es_tupleTable);
591
	}
592

593
	/* mark EvalPlanQual not active */
594
	estate->es_topPlan = plan;
595 596
	estate->es_evalPlanQual = NULL;
	estate->es_evTupleNull = NULL;
597
	estate->es_evTuple = NULL;
598 599
	estate->es_useEvalPlan = false;

B
Bruce Momjian 已提交
600
	/*
B
Bruce Momjian 已提交
601 602 603
	 * initialize the private state information for all the nodes in the query
	 * tree.  This opens files, allocates storage and leaves us ready to start
	 * processing tuples.
604
	 */
605
	planstate = ExecInitNode(plan, estate, eflags);
606

B
Bruce Momjian 已提交
607
	/*
B
Bruce Momjian 已提交
608 609 610
	 * Get the tuple descriptor describing the type of tuples to return. (this
	 * is especially important if we are creating a relation with "SELECT
	 * INTO")
611
	 */
612
	tupType = ExecGetResultType(planstate);
613

B
Bruce Momjian 已提交
614
	/*
B
Bruce Momjian 已提交
615 616 617 618 619 620
	 * Initialize the junk filter if needed.  SELECT and INSERT queries need a
	 * filter if there are any junk attrs in the tlist.  INSERT and SELECT
	 * INTO also need a filter if the plan may return raw disk tuples (else
	 * heap_insert will be scribbling on the source relation!). UPDATE and
	 * DELETE always need a filter, since there's always a junk 'ctid'
	 * attribute present --- no need to look first.
621 622
	 */
	{
623
		bool		junk_filter_needed = false;
624
		ListCell   *tlist;
625

626
		switch (operation)
627
		{
628 629
			case CMD_SELECT:
			case CMD_INSERT:
630
				foreach(tlist, plan->targetlist)
631
				{
632 633
					TargetEntry *tle = (TargetEntry *) lfirst(tlist);

634
					if (tle->resjunk)
635 636 637 638
					{
						junk_filter_needed = true;
						break;
					}
639
				}
640
				if (!junk_filter_needed &&
641 642 643
					(operation == CMD_INSERT || do_select_into) &&
					ExecMayReturnRawTuples(planstate))
					junk_filter_needed = true;
644 645 646 647 648 649 650
				break;
			case CMD_UPDATE:
			case CMD_DELETE:
				junk_filter_needed = true;
				break;
			default:
				break;
651 652
		}

653
		if (junk_filter_needed)
654
		{
655
			/*
B
Bruce Momjian 已提交
656 657 658
			 * If there are multiple result relations, each one needs its own
			 * junk filter.  Note this is only possible for UPDATE/DELETE, so
			 * we can't be fooled by some needing a filter and some not.
659 660 661
			 */
			if (parseTree->resultRelations != NIL)
			{
662 663
				PlanState **appendplans;
				int			as_nplans;
664
				ResultRelInfo *resultRelInfo;
665
				int			i;
666 667 668 669

				/* Top plan had better be an Append here. */
				Assert(IsA(plan, Append));
				Assert(((Append *) plan)->isTarget);
670 671 672 673
				Assert(IsA(planstate, AppendState));
				appendplans = ((AppendState *) planstate)->appendplans;
				as_nplans = ((AppendState *) planstate)->as_nplans;
				Assert(as_nplans == estate->es_num_result_relations);
674
				resultRelInfo = estate->es_result_relations;
675
				for (i = 0; i < as_nplans; i++)
676
				{
677
					PlanState  *subplan = appendplans[i];
678 679
					JunkFilter *j;

680
					j = ExecInitJunkFilter(subplan->plan->targetlist,
B
Bruce Momjian 已提交
681 682
							resultRelInfo->ri_RelationDesc->rd_att->tdhasoid,
								  ExecAllocTableSlot(estate->es_tupleTable));
683 684 685
					resultRelInfo->ri_junkFilter = j;
					resultRelInfo++;
				}
B
Bruce Momjian 已提交
686

687
				/*
B
Bruce Momjian 已提交
688 689
				 * Set active junkfilter too; at this point ExecInitAppend has
				 * already selected an active result relation...
690 691 692 693 694 695 696
				 */
				estate->es_junkFilter =
					estate->es_result_relation_info->ri_junkFilter;
			}
			else
			{
				/* Normal case with just one JunkFilter */
697
				JunkFilter *j;
698

699
				j = ExecInitJunkFilter(planstate->plan->targetlist,
700
									   tupType->tdhasoid,
B
Bruce Momjian 已提交
701
								  ExecAllocTableSlot(estate->es_tupleTable));
702 703 704
				estate->es_junkFilter = j;
				if (estate->es_result_relation_info)
					estate->es_result_relation_info->ri_junkFilter = j;
705

706 707 708 709
				/* For SELECT, want to return the cleaned tuple type */
				if (operation == CMD_SELECT)
					tupType = j->jf_cleanTupType;
			}
710 711 712 713
		}
		else
			estate->es_junkFilter = NULL;
	}
714

B
Bruce Momjian 已提交
715
	/*
716
	 * If doing SELECT INTO, initialize the "into" relation.  We must wait
B
Bruce Momjian 已提交
717 718
	 * till now so we have the "clean" result tuple type to create the new
	 * table from.
719 720
	 *
	 * If EXPLAIN, skip creating the "into" relation.
721
	 */
722
	intoRelationDesc = NULL;
723

724
	if (do_select_into && !(eflags & EXEC_FLAG_EXPLAIN_ONLY))
725
	{
726 727
		char	   *intoName;
		Oid			namespaceId;
728
		Oid			tablespaceId;
729
		Datum		reloptions;
730 731 732
		AclResult	aclresult;
		Oid			intoRelationId;
		TupleDesc	tupdesc;
733

734 735 736 737 738 739 740 741
		/*
		 * Check consistency of arguments
		 */
		if (parseTree->intoOnCommit != ONCOMMIT_NOOP && !parseTree->into->istemp)
			ereport(ERROR,
					(errcode(ERRCODE_INVALID_TABLE_DEFINITION),
					 errmsg("ON COMMIT can only be used on temporary tables")));

742 743 744 745 746
		/*
		 * find namespace to create in, check permissions
		 */
		intoName = parseTree->into->relname;
		namespaceId = RangeVarGetCreationNamespace(parseTree->into);
747

748 749 750
		aclresult = pg_namespace_aclcheck(namespaceId, GetUserId(),
										  ACL_CREATE);
		if (aclresult != ACLCHECK_OK)
751 752
			aclcheck_error(aclresult, ACL_KIND_NAMESPACE,
						   get_namespace_name(namespaceId));
753

754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771
		/*
		 * Select tablespace to use.  If not specified, use default_tablespace
		 * (which may in turn default to database's default).
		 */
		if (parseTree->intoTableSpaceName)
		{
			tablespaceId = get_tablespace_oid(parseTree->intoTableSpaceName);
			if (!OidIsValid(tablespaceId))
				ereport(ERROR,
						(errcode(ERRCODE_UNDEFINED_OBJECT),
						 errmsg("tablespace \"%s\" does not exist",
								parseTree->intoTableSpaceName)));
		} else
		{
			tablespaceId = GetDefaultTablespace();
			/* note InvalidOid is OK in this case */
		}

772 773 774 775 776 777 778
		/* Parse and validate any reloptions */
		reloptions = transformRelOptions((Datum) 0,
										 parseTree->intoOptions,
										 true,
										 false);
		(void) heap_reloptions(RELKIND_RELATION, reloptions, true);

779 780 781 782 783 784 785 786 787 788 789 790 791
		/* Check permissions except when using the database's default */
		if (OidIsValid(tablespaceId))
		{
			AclResult	aclresult;

			aclresult = pg_tablespace_aclcheck(tablespaceId, GetUserId(),
											   ACL_CREATE);

			if (aclresult != ACLCHECK_OK)
				aclcheck_error(aclresult, ACL_KIND_TABLESPACE,
							   get_tablespace_name(tablespaceId));
		}

792 793 794 795
		/*
		 * have to copy tupType to get rid of constraints
		 */
		tupdesc = CreateTupleDescCopy(tupType);
796

797 798
		intoRelationId = heap_create_with_catalog(intoName,
												  namespaceId,
799
												  tablespaceId,
800
												  InvalidOid,
801
												  GetUserId(),
802 803 804
												  tupdesc,
												  RELKIND_RELATION,
												  false,
805 806
												  true,
												  0,
807
												  parseTree->intoOnCommit,
808 809
												  reloptions,
												  allowSystemTableMods);
810

811
		FreeTupleDesc(tupdesc);
812

813
		/*
B
Bruce Momjian 已提交
814 815
		 * Advance command counter so that the newly-created relation's
		 * catalog tuples will be visible to heap_open.
816 817
		 */
		CommandCounterIncrement();
818

819
		/*
B
Bruce Momjian 已提交
820 821 822
		 * If necessary, create a TOAST table for the into relation. Note that
		 * AlterTableCreateToastTable ends with CommandCounterIncrement(), so
		 * that the TOAST table will be visible for insertion.
823 824
		 */
		AlterTableCreateToastTable(intoRelationId, true);
825

826 827 828 829
		/*
		 * And open the constructed table for writing.
		 */
		intoRelationDesc = heap_open(intoRelationId, AccessExclusiveLock);
830 831 832 833 834 835 836

		/* use_wal off requires rd_targblock be initially invalid */
		Assert(intoRelationDesc->rd_targblock == InvalidBlockNumber);

		/*
		 * We can skip WAL-logging the insertions, unless PITR is in use.
		 *
837 838 839 840 841
		 * Note that for a non-temp INTO table, this is safe only because we
		 * know that the catalog changes above will have been WAL-logged, and
		 * so RecordTransactionCommit will think it needs to WAL-log the
		 * eventual transaction commit.  Else the commit might be lost, even
		 * though all the data is safely fsync'd ...
842 843
		 */
		estate->es_into_relation_use_wal = XLogArchivingActive();
844 845 846 847
	}

	estate->es_into_relation_descriptor = intoRelationDesc;

848 849
	queryDesc->tupDesc = tupType;
	queryDesc->planstate = planstate;
850 851
}

852 853 854 855 856 857 858
/*
 * Initialize ResultRelInfo data for one result relation
 */
static void
initResultRelInfo(ResultRelInfo *resultRelInfo,
				  Index resultRelationIndex,
				  List *rangeTable,
859 860
				  CmdType operation,
				  bool doInstrument)
861 862 863 864 865 866 867 868 869 870
{
	Oid			resultRelationOid;
	Relation	resultRelationDesc;

	resultRelationOid = getrelid(resultRelationIndex, rangeTable);
	resultRelationDesc = heap_open(resultRelationOid, RowExclusiveLock);

	switch (resultRelationDesc->rd_rel->relkind)
	{
		case RELKIND_SEQUENCE:
871 872
			ereport(ERROR,
					(errcode(ERRCODE_WRONG_OBJECT_TYPE),
873
					 errmsg("cannot change sequence \"%s\"",
B
Bruce Momjian 已提交
874
							RelationGetRelationName(resultRelationDesc))));
875 876
			break;
		case RELKIND_TOASTVALUE:
877 878
			ereport(ERROR,
					(errcode(ERRCODE_WRONG_OBJECT_TYPE),
879
					 errmsg("cannot change TOAST relation \"%s\"",
B
Bruce Momjian 已提交
880
							RelationGetRelationName(resultRelationDesc))));
881 882
			break;
		case RELKIND_VIEW:
883 884
			ereport(ERROR,
					(errcode(ERRCODE_WRONG_OBJECT_TYPE),
885
					 errmsg("cannot change view \"%s\"",
B
Bruce Momjian 已提交
886
							RelationGetRelationName(resultRelationDesc))));
887 888 889 890 891 892 893 894 895 896
			break;
	}

	MemSet(resultRelInfo, 0, sizeof(ResultRelInfo));
	resultRelInfo->type = T_ResultRelInfo;
	resultRelInfo->ri_RangeTableIndex = resultRelationIndex;
	resultRelInfo->ri_RelationDesc = resultRelationDesc;
	resultRelInfo->ri_NumIndices = 0;
	resultRelInfo->ri_IndexRelationDescs = NULL;
	resultRelInfo->ri_IndexRelationInfo = NULL;
897 898
	/* make a copy so as not to depend on relcache info not changing... */
	resultRelInfo->ri_TrigDesc = CopyTriggerDesc(resultRelationDesc->trigdesc);
899 900
	if (resultRelInfo->ri_TrigDesc)
	{
B
Bruce Momjian 已提交
901
		int			n = resultRelInfo->ri_TrigDesc->numtriggers;
902 903 904 905 906 907 908 909 910 911 912 913 914

		resultRelInfo->ri_TrigFunctions = (FmgrInfo *)
			palloc0(n * sizeof(FmgrInfo));
		if (doInstrument)
			resultRelInfo->ri_TrigInstrument = InstrAlloc(n);
		else
			resultRelInfo->ri_TrigInstrument = NULL;
	}
	else
	{
		resultRelInfo->ri_TrigFunctions = NULL;
		resultRelInfo->ri_TrigInstrument = NULL;
	}
915 916 917 918 919
	resultRelInfo->ri_ConstraintExprs = NULL;
	resultRelInfo->ri_junkFilter = NULL;

	/*
	 * If there are indices on the result relation, open them and save
B
Bruce Momjian 已提交
920 921 922
	 * descriptors in the result relation info, so that we can add new index
	 * entries for the tuples we add/update.  We need not do this for a
	 * DELETE, however, since deletion doesn't affect indexes.
923 924 925 926 927 928
	 */
	if (resultRelationDesc->rd_rel->relhasindex &&
		operation != CMD_DELETE)
		ExecOpenIndices(resultRelInfo);
}

929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985
/*
 *		ExecContextForcesOids
 *
 * This is pretty grotty: when doing INSERT, UPDATE, or SELECT INTO,
 * we need to ensure that result tuples have space for an OID iff they are
 * going to be stored into a relation that has OIDs.  In other contexts
 * we are free to choose whether to leave space for OIDs in result tuples
 * (we generally don't want to, but we do if a physical-tlist optimization
 * is possible).  This routine checks the plan context and returns TRUE if the
 * choice is forced, FALSE if the choice is not forced.  In the TRUE case,
 * *hasoids is set to the required value.
 *
 * One reason this is ugly is that all plan nodes in the plan tree will emit
 * tuples with space for an OID, though we really only need the topmost node
 * to do so.  However, node types like Sort don't project new tuples but just
 * return their inputs, and in those cases the requirement propagates down
 * to the input node.  Eventually we might make this code smart enough to
 * recognize how far down the requirement really goes, but for now we just
 * make all plan nodes do the same thing if the top level forces the choice.
 *
 * We assume that estate->es_result_relation_info is already set up to
 * describe the target relation.  Note that in an UPDATE that spans an
 * inheritance tree, some of the target relations may have OIDs and some not.
 * We have to make the decisions on a per-relation basis as we initialize
 * each of the child plans of the topmost Append plan.
 *
 * SELECT INTO is even uglier, because we don't have the INTO relation's
 * descriptor available when this code runs; we have to look aside at a
 * flag set by InitPlan().
 */
bool
ExecContextForcesOids(PlanState *planstate, bool *hasoids)
{
	if (planstate->state->es_select_into)
	{
		*hasoids = planstate->state->es_into_oids;
		return true;
	}
	else
	{
		ResultRelInfo *ri = planstate->state->es_result_relation_info;

		if (ri != NULL)
		{
			Relation	rel = ri->ri_RelationDesc;

			if (rel != NULL)
			{
				*hasoids = rel->rd_rel->relhasoids;
				return true;
			}
		}
	}

	return false;
}

986
/* ----------------------------------------------------------------
987
 *		ExecEndPlan
988
 *
989
 *		Cleans up the query plan -- closes files and frees up storage
990 991 992 993 994 995
 *
 * NOTE: we are no longer very worried about freeing storage per se
 * in this code; FreeExecutorState should be guaranteed to release all
 * memory that needs to be released.  What we are worried about doing
 * is closing relations and dropping buffer pins.  Thus, for example,
 * tuple tables must be cleared or dropped to ensure pins are released.
996 997
 * ----------------------------------------------------------------
 */
998
void
999
ExecEndPlan(PlanState *planstate, EState *estate)
1000
{
1001 1002
	ResultRelInfo *resultRelInfo;
	int			i;
1003
	ListCell   *l;
1004

1005 1006 1007 1008 1009 1010
	/*
	 * shut down any PlanQual processing we were doing
	 */
	if (estate->es_evalPlanQual != NULL)
		EndEvalPlanQual(estate);

B
Bruce Momjian 已提交
1011
	/*
1012
	 * shut down the node-type-specific query processing
1013
	 */
1014
	ExecEndNode(planstate);
1015

B
Bruce Momjian 已提交
1016
	/*
B
Bruce Momjian 已提交
1017
	 * destroy the executor "tuple" table.
1018
	 */
1019 1020
	ExecDropTupleTable(estate->es_tupleTable, true);
	estate->es_tupleTable = NULL;
1021

B
Bruce Momjian 已提交
1022
	/*
B
Bruce Momjian 已提交
1023
	 * close the result relation(s) if any, but hold locks until xact commit.
1024
	 */
1025 1026
	resultRelInfo = estate->es_result_relations;
	for (i = estate->es_num_result_relations; i > 0; i--)
1027
	{
1028 1029 1030 1031
		/* Close indices and then the relation itself */
		ExecCloseIndices(resultRelInfo);
		heap_close(resultRelInfo->ri_RelationDesc, NoLock);
		resultRelInfo++;
1032 1033
	}

B
Bruce Momjian 已提交
1034
	/*
1035
	 * close the "into" relation if necessary, again keeping lock
1036
	 */
1037
	if (estate->es_into_relation_descriptor != NULL)
1038 1039
	{
		/*
B
Bruce Momjian 已提交
1040 1041 1042 1043
		 * If we skipped using WAL, and it's not a temp relation, we must
		 * force the relation down to disk before it's safe to commit the
		 * transaction.  This requires forcing out any dirty buffers and then
		 * doing a forced fsync.
1044 1045 1046 1047 1048
		 */
		if (!estate->es_into_relation_use_wal &&
			!estate->es_into_relation_descriptor->rd_istemp)
		{
			FlushRelationBuffers(estate->es_into_relation_descriptor);
1049
			/* FlushRelationBuffers will have opened rd_smgr */
1050 1051 1052
			smgrimmedsync(estate->es_into_relation_descriptor->rd_smgr);
		}

1053
		heap_close(estate->es_into_relation_descriptor, NoLock);
1054
	}
1055 1056

	/*
1057
	 * close any relations selected FOR UPDATE/FOR SHARE, again keeping locks
1058
	 */
1059
	foreach(l, estate->es_rowMarks)
1060
	{
1061
		ExecRowMark *erm = lfirst(l);
1062 1063 1064

		heap_close(erm->relation, NoLock);
	}
1065 1066 1067
}

/* ----------------------------------------------------------------
1068 1069
 *		ExecutePlan
 *
1070
 *		processes the query plan to retrieve 'numberTuples' tuples in the
1071
 *		direction specified.
1072
 *
1073
 *		Retrieves all tuples if numberTuples is 0
1074
 *
1075
 *		result is either a slot containing the last tuple in the case
1076
 *		of a SELECT or NULL otherwise.
1077
 *
1078 1079
 * Note: the ctid attribute is a 'junk' attribute that is removed before the
 * user can see it
1080 1081 1082
 * ----------------------------------------------------------------
 */
static TupleTableSlot *
1083
ExecutePlan(EState *estate,
1084
			PlanState *planstate,
1085
			CmdType operation,
1086
			long numberTuples,
1087
			ScanDirection direction,
1088
			DestReceiver *dest)
1089
{
B
Bruce Momjian 已提交
1090 1091 1092 1093 1094 1095
	JunkFilter *junkfilter;
	TupleTableSlot *slot;
	ItemPointer tupleid = NULL;
	ItemPointerData tuple_ctid;
	long		current_tuple_count;
	TupleTableSlot *result;
1096

B
Bruce Momjian 已提交
1097
	/*
B
Bruce Momjian 已提交
1098
	 * initialize local variables
1099
	 */
1100 1101 1102 1103
	slot = NULL;
	current_tuple_count = 0;
	result = NULL;

B
Bruce Momjian 已提交
1104 1105
	/*
	 * Set the direction.
1106
	 */
1107 1108
	estate->es_direction = direction;

1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124
	/*
	 * Process BEFORE EACH STATEMENT triggers
	 */
	switch (operation)
	{
		case CMD_UPDATE:
			ExecBSUpdateTriggers(estate, estate->es_result_relation_info);
			break;
		case CMD_DELETE:
			ExecBSDeleteTriggers(estate, estate->es_result_relation_info);
			break;
		case CMD_INSERT:
			ExecBSInsertTriggers(estate, estate->es_result_relation_info);
			break;
		default:
			/* do nothing */
1125
			break;
1126 1127
	}

B
Bruce Momjian 已提交
1128
	/*
B
Bruce Momjian 已提交
1129
	 * Loop until we've processed the proper number of tuples from the plan.
1130 1131 1132 1133
	 */

	for (;;)
	{
1134 1135
		/* Reset the per-output-tuple exprcontext */
		ResetPerTupleExprContext(estate);
B
Bruce Momjian 已提交
1136

B
Bruce Momjian 已提交
1137
		/*
B
Bruce Momjian 已提交
1138
		 * Execute the plan and obtain a tuple
1139
		 */
B
Bruce Momjian 已提交
1140
lnext:	;
1141 1142 1143 1144
		if (estate->es_useEvalPlan)
		{
			slot = EvalPlanQualNext(estate);
			if (TupIsNull(slot))
1145
				slot = ExecProcNode(planstate);
1146 1147
		}
		else
1148
			slot = ExecProcNode(planstate);
1149

B
Bruce Momjian 已提交
1150
		/*
B
Bruce Momjian 已提交
1151 1152
		 * if the tuple is null, then we assume there is nothing more to
		 * process so we just return null...
1153 1154 1155 1156 1157
		 */
		if (TupIsNull(slot))
		{
			result = NULL;
			break;
1158 1159
		}

B
Bruce Momjian 已提交
1160
		/*
B
Bruce Momjian 已提交
1161 1162
		 * if we have a junk filter, then project a new tuple with the junk
		 * removed.
1163
		 *
1164
		 * Store this new "clean" tuple in the junkfilter's resultSlot.
B
Bruce Momjian 已提交
1165 1166
		 * (Formerly, we stored it back over the "dirty" tuple, which is WRONG
		 * because that tuple slot has the wrong descriptor.)
1167
		 *
B
Bruce Momjian 已提交
1168
		 * Also, extract all the junk information we need.
1169
		 */
1170
		if ((junkfilter = estate->es_junkFilter) != NULL)
1171
		{
1172 1173
			Datum		datum;
			bool		isNull;
1174

B
Bruce Momjian 已提交
1175
			/*
1176 1177 1178 1179 1180 1181 1182 1183 1184
			 * extract the 'ctid' junk attribute.
			 */
			if (operation == CMD_UPDATE || operation == CMD_DELETE)
			{
				if (!ExecGetJunkAttribute(junkfilter,
										  slot,
										  "ctid",
										  &datum,
										  &isNull))
1185
					elog(ERROR, "could not find junk ctid column");
1186

1187
				/* shouldn't ever get a null result... */
1188
				if (isNull)
1189
					elog(ERROR, "ctid is NULL");
1190 1191

				tupleid = (ItemPointer) DatumGetPointer(datum);
B
Bruce Momjian 已提交
1192
				tuple_ctid = *tupleid;	/* make sure we don't free the ctid!! */
1193 1194
				tupleid = &tuple_ctid;
			}
B
Bruce Momjian 已提交
1195

1196 1197 1198
			/*
			 * Process any FOR UPDATE or FOR SHARE locking requested.
			 */
1199
			else if (estate->es_rowMarks != NIL)
1200
			{
1201
				ListCell   *l;
1202

B
Bruce Momjian 已提交
1203
		lmark:	;
1204
				foreach(l, estate->es_rowMarks)
1205
				{
1206
					ExecRowMark *erm = lfirst(l);
1207
					HeapTupleData tuple;
1208 1209 1210
					Buffer		buffer;
					ItemPointerData update_ctid;
					TransactionId update_xmax;
1211
					TupleTableSlot *newSlot;
B
Bruce Momjian 已提交
1212 1213
					LockTupleMode lockmode;
					HTSU_Result test;
1214

1215 1216 1217 1218 1219
					if (!ExecGetJunkAttribute(junkfilter,
											  slot,
											  erm->resname,
											  &datum,
											  &isNull))
1220
						elog(ERROR, "could not find junk \"%s\" column",
1221
							 erm->resname);
1222

1223
					/* shouldn't ever get a null result... */
1224
					if (isNull)
1225
						elog(ERROR, "\"%s\" is NULL", erm->resname);
1226

1227 1228
					tuple.t_self = *((ItemPointer) DatumGetPointer(datum));

1229
					if (erm->forUpdate)
1230 1231 1232 1233 1234
						lockmode = LockTupleExclusive;
					else
						lockmode = LockTupleShared;

					test = heap_lock_tuple(erm->relation, &tuple, &buffer,
1235 1236
										   &update_ctid, &update_xmax,
										   estate->es_snapshot->curcid,
1237
										   lockmode, erm->noWait);
1238 1239 1240 1241
					ReleaseBuffer(buffer);
					switch (test)
					{
						case HeapTupleSelfUpdated:
1242 1243 1244
							/* treat it as deleted; do not process */
							goto lnext;

1245 1246 1247 1248
						case HeapTupleMayBeUpdated:
							break;

						case HeapTupleUpdated:
1249
							if (IsXactIsoLevelSerializable)
1250
								ereport(ERROR,
B
Bruce Momjian 已提交
1251 1252
								 (errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
								  errmsg("could not serialize access due to concurrent update")));
1253 1254
							if (!ItemPointerEquals(&update_ctid,
												   &tuple.t_self))
1255
							{
1256 1257 1258 1259
								/* updated, so look at updated version */
								newSlot = EvalPlanQual(estate,
													   erm->rti,
													   &update_ctid,
1260 1261
													   update_xmax,
													   estate->es_snapshot->curcid);
1262
								if (!TupIsNull(newSlot))
1263 1264 1265 1266 1267 1268
								{
									slot = newSlot;
									estate->es_useEvalPlan = true;
									goto lmark;
								}
							}
B
Bruce Momjian 已提交
1269 1270 1271

							/*
							 * if tuple was deleted or PlanQual failed for
B
Bruce Momjian 已提交
1272
							 * updated tuple - we must not return this tuple!
1273 1274
							 */
							goto lnext;
1275 1276

						default:
1277
							elog(ERROR, "unrecognized heap_lock_tuple status: %u",
1278
								 test);
1279
							return NULL;
1280 1281 1282
					}
				}
			}
1283

B
Bruce Momjian 已提交
1284
			/*
1285 1286 1287
			 * Finally create a new "clean" tuple with all junk attributes
			 * removed
			 */
1288
			slot = ExecFilterJunk(junkfilter, slot);
1289
		}
1290

B
Bruce Momjian 已提交
1291
		/*
B
Bruce Momjian 已提交
1292 1293 1294
		 * now that we have a tuple, do the appropriate thing with it.. either
		 * return it to the user, add it to a relation someplace, delete it
		 * from a relation, or modify some of its attributes.
1295 1296 1297
		 */
		switch (operation)
		{
1298
			case CMD_SELECT:
B
Bruce Momjian 已提交
1299
				ExecSelect(slot,	/* slot containing tuple */
1300
						   dest,	/* destination's tuple-receiver obj */
1301
						   estate);
1302 1303
				result = slot;
				break;
1304

1305
			case CMD_INSERT:
1306
				ExecInsert(slot, tupleid, estate);
1307 1308
				result = NULL;
				break;
1309

1310 1311 1312 1313
			case CMD_DELETE:
				ExecDelete(slot, tupleid, estate);
				result = NULL;
				break;
1314

1315
			case CMD_UPDATE:
1316
				ExecUpdate(slot, tupleid, estate);
1317 1318
				result = NULL;
				break;
1319

1320
			default:
1321 1322
				elog(ERROR, "unrecognized operation code: %d",
					 (int) operation);
1323
				result = NULL;
1324
				break;
1325
		}
B
Bruce Momjian 已提交
1326

B
Bruce Momjian 已提交
1327
		/*
B
Bruce Momjian 已提交
1328 1329 1330
		 * check our tuple count.. if we've processed the proper number then
		 * quit, else loop again and process more tuples.  Zero numberTuples
		 * means no limit.
1331
		 */
1332
		current_tuple_count++;
1333
		if (numberTuples && numberTuples == current_tuple_count)
1334
			break;
1335
	}
1336

1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352
	/*
	 * Process AFTER EACH STATEMENT triggers
	 */
	switch (operation)
	{
		case CMD_UPDATE:
			ExecASUpdateTriggers(estate, estate->es_result_relation_info);
			break;
		case CMD_DELETE:
			ExecASDeleteTriggers(estate, estate->es_result_relation_info);
			break;
		case CMD_INSERT:
			ExecASInsertTriggers(estate, estate->es_result_relation_info);
			break;
		default:
			/* do nothing */
1353
			break;
1354 1355
	}

B
Bruce Momjian 已提交
1356
	/*
B
Bruce Momjian 已提交
1357
	 * here, result is either a slot containing a tuple in the case of a
1358
	 * SELECT or NULL otherwise.
1359
	 */
1360
	return result;
1361 1362 1363
}

/* ----------------------------------------------------------------
1364
 *		ExecSelect
1365
 *
1366
 *		SELECTs are easy.. we just pass the tuple to the appropriate
1367
 *		print function.  The only complexity is when we do a
1368
 *		"SELECT INTO", in which case we insert the tuple into
1369 1370
 *		the appropriate relation (note: this is a newly created relation
 *		so we don't need to worry about indices or locks.)
1371 1372 1373
 * ----------------------------------------------------------------
 */
static void
1374
ExecSelect(TupleTableSlot *slot,
1375
		   DestReceiver *dest,
1376
		   EState *estate)
1377
{
B
Bruce Momjian 已提交
1378
	/*
B
Bruce Momjian 已提交
1379
	 * insert the tuple into the "into relation"
1380 1381
	 *
	 * XXX this probably ought to be replaced by a separate destination
1382 1383 1384
	 */
	if (estate->es_into_relation_descriptor != NULL)
	{
1385 1386 1387
		HeapTuple	tuple;

		tuple = ExecCopySlotTuple(slot);
1388
		heap_insert(estate->es_into_relation_descriptor, tuple,
1389 1390 1391
					estate->es_snapshot->curcid,
					estate->es_into_relation_use_wal,
					false);		/* never any point in using FSM */
1392 1393
		/* we know there are no indexes to update */
		heap_freetuple(tuple);
1394 1395 1396
		IncrAppended();
	}

B
Bruce Momjian 已提交
1397
	/*
1398
	 * send the tuple to the destination
1399
	 */
1400
	(*dest->receiveSlot) (slot, dest);
1401 1402
	IncrRetrieved();
	(estate->es_processed)++;
1403 1404 1405
}

/* ----------------------------------------------------------------
1406
 *		ExecInsert
1407
 *
1408
 *		INSERTs are trickier.. we have to insert the tuple into
1409 1410
 *		the base relation and insert appropriate tuples into the
 *		index relations.
1411 1412 1413
 * ----------------------------------------------------------------
 */
static void
1414
ExecInsert(TupleTableSlot *slot,
1415
		   ItemPointer tupleid,
1416
		   EState *estate)
1417
{
1418
	HeapTuple	tuple;
1419
	ResultRelInfo *resultRelInfo;
1420 1421
	Relation	resultRelationDesc;
	Oid			newId;
1422

B
Bruce Momjian 已提交
1423
	/*
B
Bruce Momjian 已提交
1424 1425
	 * get the heap tuple out of the tuple table slot, making sure we have a
	 * writable copy
1426
	 */
1427
	tuple = ExecMaterializeSlot(slot);
1428

B
Bruce Momjian 已提交
1429
	/*
1430
	 * get information on the (current) result relation
1431
	 */
1432 1433
	resultRelInfo = estate->es_result_relation_info;
	resultRelationDesc = resultRelInfo->ri_RelationDesc;
1434 1435

	/* BEFORE ROW INSERT Triggers */
1436
	if (resultRelInfo->ri_TrigDesc &&
B
Bruce Momjian 已提交
1437
		resultRelInfo->ri_TrigDesc->n_before_row[TRIGGER_EVENT_INSERT] > 0)
1438
	{
1439
		HeapTuple	newtuple;
1440

1441
		newtuple = ExecBRInsertTriggers(estate, resultRelInfo, tuple);
1442 1443 1444 1445 1446 1447

		if (newtuple == NULL)	/* "do nothing" */
			return;

		if (newtuple != tuple)	/* modified by Trigger(s) */
		{
1448
			/*
1449 1450
			 * Put the modified tuple into a slot for convenience of routines
			 * below.  We assume the tuple was allocated in per-tuple memory
B
Bruce Momjian 已提交
1451 1452
			 * context, and therefore will go away by itself. The tuple table
			 * slot should not try to clear it.
1453
			 */
1454 1455 1456
			TupleTableSlot *newslot = estate->es_trig_tuple_slot;

			if (newslot->tts_tupleDescriptor != slot->tts_tupleDescriptor)
1457
				ExecSetSlotDescriptor(newslot, slot->tts_tupleDescriptor);
1458 1459
			ExecStoreTuple(newtuple, newslot, InvalidBuffer, false);
			slot = newslot;
1460
			tuple = newtuple;
1461 1462 1463
		}
	}

B
Bruce Momjian 已提交
1464
	/*
1465
	 * Check the constraints of the tuple
1466 1467
	 */
	if (resultRelationDesc->rd_att->constr)
1468
		ExecConstraints(resultRelInfo, slot, estate);
1469

B
Bruce Momjian 已提交
1470
	/*
B
Bruce Momjian 已提交
1471
	 * insert the tuple
1472
	 *
B
Bruce Momjian 已提交
1473 1474
	 * Note: heap_insert returns the tid (location) of the new tuple in the
	 * t_self field.
1475
	 */
1476
	newId = heap_insert(resultRelationDesc, tuple,
1477 1478
						estate->es_snapshot->curcid,
						true, true);
1479

1480
	IncrAppended();
1481 1482
	(estate->es_processed)++;
	estate->es_lastoid = newId;
T
Tom Lane 已提交
1483
	setLastTid(&(tuple->t_self));
1484

B
Bruce Momjian 已提交
1485
	/*
1486
	 * insert index entries for tuple
1487
	 */
1488
	if (resultRelInfo->ri_NumIndices > 0)
1489
		ExecInsertIndexTuples(slot, &(tuple->t_self), estate, false);
1490 1491

	/* AFTER ROW INSERT Triggers */
1492
	ExecARInsertTriggers(estate, resultRelInfo, tuple);
1493 1494 1495
}

/* ----------------------------------------------------------------
1496
 *		ExecDelete
1497
 *
1498 1499
 *		DELETE is like UPDATE, except that we delete the tuple and no
 *		index modifications are needed
1500 1501 1502
 * ----------------------------------------------------------------
 */
static void
1503
ExecDelete(TupleTableSlot *slot,
1504
		   ItemPointer tupleid,
1505
		   EState *estate)
1506
{
1507
	ResultRelInfo *resultRelInfo;
B
Bruce Momjian 已提交
1508
	Relation	resultRelationDesc;
B
Bruce Momjian 已提交
1509
	HTSU_Result result;
1510 1511
	ItemPointerData update_ctid;
	TransactionId update_xmax;
1512

B
Bruce Momjian 已提交
1513
	/*
1514
	 * get information on the (current) result relation
1515
	 */
1516 1517
	resultRelInfo = estate->es_result_relation_info;
	resultRelationDesc = resultRelInfo->ri_RelationDesc;
1518 1519

	/* BEFORE ROW DELETE Triggers */
1520
	if (resultRelInfo->ri_TrigDesc &&
B
Bruce Momjian 已提交
1521
		resultRelInfo->ri_TrigDesc->n_before_row[TRIGGER_EVENT_DELETE] > 0)
1522
	{
1523
		bool		dodelete;
1524

1525
		dodelete = ExecBRDeleteTriggers(estate, resultRelInfo, tupleid,
1526
										estate->es_snapshot->curcid);
1527 1528 1529 1530 1531

		if (!dodelete)			/* "do nothing" */
			return;
	}

V
Vadim B. Mikheev 已提交
1532
	/*
B
Bruce Momjian 已提交
1533
	 * delete the tuple
1534
	 *
1535 1536
	 * Note: if es_crosscheck_snapshot isn't InvalidSnapshot, we check that
	 * the row to be deleted is visible to that snapshot, and throw a can't-
B
Bruce Momjian 已提交
1537
	 * serialize error if not.	This is a special-case behavior needed for
1538
	 * referential integrity updates in serializable transactions.
1539
	 */
1540
ldelete:;
1541
	result = heap_delete(resultRelationDesc, tupleid,
1542
						 &update_ctid, &update_xmax,
1543 1544
						 estate->es_snapshot->curcid,
						 estate->es_crosscheck_snapshot,
B
Bruce Momjian 已提交
1545
						 true /* wait for commit */ );
V
Vadim B. Mikheev 已提交
1546 1547 1548
	switch (result)
	{
		case HeapTupleSelfUpdated:
1549
			/* already deleted by self; nothing to do */
V
Vadim B. Mikheev 已提交
1550 1551 1552 1553 1554 1555
			return;

		case HeapTupleMayBeUpdated:
			break;

		case HeapTupleUpdated:
1556
			if (IsXactIsoLevelSerializable)
1557 1558
				ereport(ERROR,
						(errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
1559
						 errmsg("could not serialize access due to concurrent update")));
1560
			else if (!ItemPointerEquals(tupleid, &update_ctid))
1561
			{
1562
				TupleTableSlot *epqslot;
1563

1564 1565 1566
				epqslot = EvalPlanQual(estate,
									   resultRelInfo->ri_RangeTableIndex,
									   &update_ctid,
1567 1568
									   update_xmax,
									   estate->es_snapshot->curcid);
V
Vadim B. Mikheev 已提交
1569
				if (!TupIsNull(epqslot))
1570
				{
1571
					*tupleid = update_ctid;
1572 1573 1574
					goto ldelete;
				}
			}
1575
			/* tuple already deleted; nothing to do */
V
Vadim B. Mikheev 已提交
1576 1577 1578
			return;

		default:
1579
			elog(ERROR, "unrecognized heap_delete status: %u", result);
V
Vadim B. Mikheev 已提交
1580 1581
			return;
	}
1582 1583 1584 1585

	IncrDeleted();
	(estate->es_processed)++;

B
Bruce Momjian 已提交
1586
	/*
B
Bruce Momjian 已提交
1587
	 * Note: Normally one would think that we have to delete index tuples
1588
	 * associated with the heap tuple now...
1589
	 *
1590 1591 1592
	 * ... but in POSTGRES, we have no need to do this because VACUUM will
	 * take care of it later.  We can't delete index tuples immediately
	 * anyway, since the tuple is still visible to other transactions.
1593 1594 1595
	 */

	/* AFTER ROW DELETE Triggers */
1596
	ExecARDeleteTriggers(estate, resultRelInfo, tupleid);
1597 1598 1599
}

/* ----------------------------------------------------------------
1600
 *		ExecUpdate
1601
 *
1602 1603 1604 1605
 *		note: we can't run UPDATE queries with transactions
 *		off because UPDATEs are actually INSERTs and our
 *		scan will mistakenly loop forever, updating the tuple
 *		it just inserted..	This should be fixed but until it
1606 1607
 *		is, we don't want to get stuck in an infinite loop
 *		which corrupts your database..
1608 1609 1610
 * ----------------------------------------------------------------
 */
static void
1611
ExecUpdate(TupleTableSlot *slot,
B
Bruce Momjian 已提交
1612 1613
		   ItemPointer tupleid,
		   EState *estate)
1614
{
B
Bruce Momjian 已提交
1615
	HeapTuple	tuple;
1616
	ResultRelInfo *resultRelInfo;
B
Bruce Momjian 已提交
1617
	Relation	resultRelationDesc;
B
Bruce Momjian 已提交
1618
	HTSU_Result result;
1619 1620
	ItemPointerData update_ctid;
	TransactionId update_xmax;
1621

B
Bruce Momjian 已提交
1622
	/*
B
Bruce Momjian 已提交
1623
	 * abort the operation if not running transactions
1624 1625
	 */
	if (IsBootstrapProcessingMode())
1626
		elog(ERROR, "cannot UPDATE during bootstrap");
1627

B
Bruce Momjian 已提交
1628
	/*
B
Bruce Momjian 已提交
1629 1630
	 * get the heap tuple out of the tuple table slot, making sure we have a
	 * writable copy
1631
	 */
1632
	tuple = ExecMaterializeSlot(slot);
1633

B
Bruce Momjian 已提交
1634
	/*
1635
	 * get information on the (current) result relation
1636
	 */
1637 1638
	resultRelInfo = estate->es_result_relation_info;
	resultRelationDesc = resultRelInfo->ri_RelationDesc;
1639 1640

	/* BEFORE ROW UPDATE Triggers */
1641
	if (resultRelInfo->ri_TrigDesc &&
B
Bruce Momjian 已提交
1642
		resultRelInfo->ri_TrigDesc->n_before_row[TRIGGER_EVENT_UPDATE] > 0)
1643
	{
1644
		HeapTuple	newtuple;
1645

1646
		newtuple = ExecBRUpdateTriggers(estate, resultRelInfo,
1647
										tupleid, tuple,
1648
										estate->es_snapshot->curcid);
1649 1650 1651 1652 1653 1654

		if (newtuple == NULL)	/* "do nothing" */
			return;

		if (newtuple != tuple)	/* modified by Trigger(s) */
		{
1655
			/*
1656 1657
			 * Put the modified tuple into a slot for convenience of routines
			 * below.  We assume the tuple was allocated in per-tuple memory
B
Bruce Momjian 已提交
1658 1659
			 * context, and therefore will go away by itself. The tuple table
			 * slot should not try to clear it.
1660
			 */
1661 1662 1663
			TupleTableSlot *newslot = estate->es_trig_tuple_slot;

			if (newslot->tts_tupleDescriptor != slot->tts_tupleDescriptor)
1664
				ExecSetSlotDescriptor(newslot, slot->tts_tupleDescriptor);
1665 1666
			ExecStoreTuple(newtuple, newslot, InvalidBuffer, false);
			slot = newslot;
1667
			tuple = newtuple;
1668 1669 1670
		}
	}

B
Bruce Momjian 已提交
1671
	/*
1672
	 * Check the constraints of the tuple
1673
	 *
1674 1675
	 * If we generate a new candidate tuple after EvalPlanQual testing, we
	 * must loop back here and recheck constraints.  (We don't need to redo
B
Bruce Momjian 已提交
1676 1677 1678
	 * triggers, however.  If there are any BEFORE triggers then trigger.c
	 * will have done heap_lock_tuple to lock the correct tuple, so there's no
	 * need to do them again.)
1679
	 */
1680
lreplace:;
1681
	if (resultRelationDesc->rd_att->constr)
1682
		ExecConstraints(resultRelInfo, slot, estate);
1683

V
Vadim B. Mikheev 已提交
1684
	/*
B
Bruce Momjian 已提交
1685
	 * replace the heap tuple
1686
	 *
1687 1688
	 * Note: if es_crosscheck_snapshot isn't InvalidSnapshot, we check that
	 * the row to be updated is visible to that snapshot, and throw a can't-
B
Bruce Momjian 已提交
1689
	 * serialize error if not.	This is a special-case behavior needed for
1690
	 * referential integrity updates in serializable transactions.
1691
	 */
1692
	result = heap_update(resultRelationDesc, tupleid, tuple,
1693
						 &update_ctid, &update_xmax,
1694 1695
						 estate->es_snapshot->curcid,
						 estate->es_crosscheck_snapshot,
B
Bruce Momjian 已提交
1696
						 true /* wait for commit */ );
V
Vadim B. Mikheev 已提交
1697 1698 1699
	switch (result)
	{
		case HeapTupleSelfUpdated:
1700
			/* already deleted by self; nothing to do */
V
Vadim B. Mikheev 已提交
1701 1702 1703 1704 1705 1706
			return;

		case HeapTupleMayBeUpdated:
			break;

		case HeapTupleUpdated:
1707
			if (IsXactIsoLevelSerializable)
1708 1709
				ereport(ERROR,
						(errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
1710
						 errmsg("could not serialize access due to concurrent update")));
1711
			else if (!ItemPointerEquals(tupleid, &update_ctid))
1712
			{
1713
				TupleTableSlot *epqslot;
1714

1715 1716 1717
				epqslot = EvalPlanQual(estate,
									   resultRelInfo->ri_RangeTableIndex,
									   &update_ctid,
1718 1719
									   update_xmax,
									   estate->es_snapshot->curcid);
V
Vadim B. Mikheev 已提交
1720
				if (!TupIsNull(epqslot))
1721
				{
1722
					*tupleid = update_ctid;
1723 1724
					slot = ExecFilterJunk(estate->es_junkFilter, epqslot);
					tuple = ExecMaterializeSlot(slot);
1725 1726 1727
					goto lreplace;
				}
			}
1728
			/* tuple already deleted; nothing to do */
V
Vadim B. Mikheev 已提交
1729 1730 1731
			return;

		default:
1732
			elog(ERROR, "unrecognized heap_update status: %u", result);
V
Vadim B. Mikheev 已提交
1733
			return;
1734 1735 1736 1737 1738
	}

	IncrReplaced();
	(estate->es_processed)++;

B
Bruce Momjian 已提交
1739
	/*
B
Bruce Momjian 已提交
1740 1741 1742
	 * Note: instead of having to update the old index tuples associated with
	 * the heap tuple, all we do is form and insert new index tuples. This is
	 * because UPDATEs are actually DELETEs and INSERTs, and index tuple
1743
	 * deletion is done later by VACUUM (see notes in ExecDelete).	All we do
1744
	 * here is insert new index tuples.  -cim 9/27/89
1745 1746
	 */

B
Bruce Momjian 已提交
1747
	/*
1748
	 * insert index entries for tuple
1749
	 *
B
Bruce Momjian 已提交
1750 1751
	 * Note: heap_update returns the tid (location) of the new tuple in the
	 * t_self field.
1752
	 */
1753
	if (resultRelInfo->ri_NumIndices > 0)
1754
		ExecInsertIndexTuples(slot, &(tuple->t_self), estate, false);
1755 1756

	/* AFTER ROW UPDATE Triggers */
1757
	ExecARUpdateTriggers(estate, resultRelInfo, tupleid, tuple);
1758
}
V
Vadim B. Mikheev 已提交
1759

1760
static const char *
1761 1762
ExecRelCheck(ResultRelInfo *resultRelInfo,
			 TupleTableSlot *slot, EState *estate)
V
Vadim B. Mikheev 已提交
1763
{
1764
	Relation	rel = resultRelInfo->ri_RelationDesc;
1765 1766
	int			ncheck = rel->rd_att->constr->num_check;
	ConstrCheck *check = rel->rd_att->constr->check;
1767
	ExprContext *econtext;
1768
	MemoryContext oldContext;
1769 1770
	List	   *qual;
	int			i;
1771

1772 1773
	/*
	 * If first time through for this result relation, build expression
B
Bruce Momjian 已提交
1774 1775
	 * nodetrees for rel's constraint expressions.  Keep them in the per-query
	 * memory context so they'll survive throughout the query.
1776 1777 1778 1779 1780 1781 1782 1783
	 */
	if (resultRelInfo->ri_ConstraintExprs == NULL)
	{
		oldContext = MemoryContextSwitchTo(estate->es_query_cxt);
		resultRelInfo->ri_ConstraintExprs =
			(List **) palloc(ncheck * sizeof(List *));
		for (i = 0; i < ncheck; i++)
		{
1784 1785
			/* ExecQual wants implicit-AND form */
			qual = make_ands_implicit(stringToNode(check[i].ccbin));
1786
			resultRelInfo->ri_ConstraintExprs[i] = (List *)
1787
				ExecPrepareExpr((Expr *) qual, estate);
1788 1789 1790 1791
		}
		MemoryContextSwitchTo(oldContext);
	}

1792
	/*
B
Bruce Momjian 已提交
1793 1794
	 * We will use the EState's per-tuple context for evaluating constraint
	 * expressions (creating it if it's not already there).
1795
	 */
1796
	econtext = GetPerTupleExprContext(estate);
1797

1798 1799 1800 1801
	/* Arrange for econtext's scan tuple to be the tuple under test */
	econtext->ecxt_scantuple = slot;

	/* And evaluate the constraints */
1802 1803
	for (i = 0; i < ncheck; i++)
	{
1804
		qual = resultRelInfo->ri_ConstraintExprs[i];
1805

1806 1807
		/*
		 * NOTE: SQL92 specifies that a NULL result from a constraint
1808 1809
		 * expression is not to be treated as a failure.  Therefore, tell
		 * ExecQual to return TRUE for NULL.
1810
		 */
1811
		if (!ExecQual(qual, econtext, true))
1812
			return check[i].ccname;
1813 1814
	}

1815
	/* NULL result means no error */
1816
	return NULL;
V
Vadim B. Mikheev 已提交
1817 1818
}

1819
void
1820
ExecConstraints(ResultRelInfo *resultRelInfo,
1821
				TupleTableSlot *slot, EState *estate)
V
Vadim B. Mikheev 已提交
1822
{
1823
	Relation	rel = resultRelInfo->ri_RelationDesc;
1824 1825 1826
	TupleConstr *constr = rel->rd_att->constr;

	Assert(constr);
1827

1828
	if (constr->has_not_null)
V
Vadim B. Mikheev 已提交
1829
	{
1830
		int			natts = rel->rd_att->natts;
1831
		int			attrChk;
1832

1833
		for (attrChk = 1; attrChk <= natts; attrChk++)
1834
		{
B
Bruce Momjian 已提交
1835
			if (rel->rd_att->attrs[attrChk - 1]->attnotnull &&
1836
				slot_attisnull(slot, attrChk))
1837 1838
				ereport(ERROR,
						(errcode(ERRCODE_NOT_NULL_VIOLATION),
1839
						 errmsg("null value in column \"%s\" violates not-null constraint",
B
Bruce Momjian 已提交
1840
						NameStr(rel->rd_att->attrs[attrChk - 1]->attname))));
1841 1842 1843
		}
	}

1844
	if (constr->num_check > 0)
1845
	{
B
Bruce Momjian 已提交
1846
		const char *failed;
1847

1848
		if ((failed = ExecRelCheck(resultRelInfo, slot, estate)) != NULL)
1849 1850
			ereport(ERROR,
					(errcode(ERRCODE_CHECK_VIOLATION),
1851
					 errmsg("new row for relation \"%s\" violates check constraint \"%s\"",
1852
							RelationGetRelationName(rel), failed)));
1853
	}
V
Vadim B. Mikheev 已提交
1854
}
1855

1856 1857 1858 1859 1860
/*
 * Check a modified tuple to see if we want to process its updated version
 * under READ COMMITTED rules.
 *
 * See backend/executor/README for some info about how this works.
1861 1862 1863 1864 1865
 *
 *	estate - executor state data
 *	rti - rangetable index of table containing tuple
 *	*tid - t_ctid from the outdated tuple (ie, next updated version)
 *	priorXmax - t_xmax from the outdated tuple
1866
 *	curCid - command ID of current command of my transaction
1867 1868 1869 1870 1871 1872
 *
 * *tid is also an output parameter: it's modified to hold the TID of the
 * latest version of the tuple (note this may be changed even on failure)
 *
 * Returns a slot containing the new candidate update/delete tuple, or
 * NULL if we determine we shouldn't process the row.
1873
 */
B
Bruce Momjian 已提交
1874
TupleTableSlot *
1875
EvalPlanQual(EState *estate, Index rti,
1876
			 ItemPointer tid, TransactionId priorXmax, CommandId curCid)
1877
{
1878 1879
	evalPlanQual *epq;
	EState	   *epqstate;
B
Bruce Momjian 已提交
1880 1881
	Relation	relation;
	HeapTupleData tuple;
1882 1883
	HeapTuple	copyTuple = NULL;
	bool		endNode;
1884 1885 1886

	Assert(rti != 0);

1887 1888 1889 1890 1891 1892 1893 1894
	/*
	 * find relation containing target tuple
	 */
	if (estate->es_result_relation_info != NULL &&
		estate->es_result_relation_info->ri_RangeTableIndex == rti)
		relation = estate->es_result_relation_info->ri_RelationDesc;
	else
	{
1895
		ListCell   *l;
1896 1897

		relation = NULL;
1898
		foreach(l, estate->es_rowMarks)
1899
		{
1900
			if (((ExecRowMark *) lfirst(l))->rti == rti)
1901
			{
1902
				relation = ((ExecRowMark *) lfirst(l))->relation;
1903 1904 1905 1906
				break;
			}
		}
		if (relation == NULL)
1907
			elog(ERROR, "could not find RowMark for RT index %u", rti);
1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919
	}

	/*
	 * fetch tid tuple
	 *
	 * Loop here to deal with updated or busy tuples
	 */
	tuple.t_self = *tid;
	for (;;)
	{
		Buffer		buffer;

1920
		if (heap_fetch(relation, SnapshotDirty, &tuple, &buffer, true, NULL))
1921
		{
1922 1923
			/*
			 * If xmin isn't what we're expecting, the slot must have been
B
Bruce Momjian 已提交
1924 1925 1926
			 * recycled and reused for an unrelated tuple.	This implies that
			 * the latest version of the row was deleted, so we need do
			 * nothing.  (Should be safe to examine xmin without getting
1927 1928 1929 1930 1931 1932 1933 1934 1935
			 * buffer's content lock, since xmin never changes in an existing
			 * tuple.)
			 */
			if (!TransactionIdEquals(HeapTupleHeaderGetXmin(tuple.t_data),
									 priorXmax))
			{
				ReleaseBuffer(buffer);
				return NULL;
			}
1936

1937
			/* otherwise xmin should not be dirty... */
1938
			if (TransactionIdIsValid(SnapshotDirty->xmin))
1939
				elog(ERROR, "t_xmin is uncommitted in tuple to be updated");
1940 1941

			/*
B
Bruce Momjian 已提交
1942 1943
			 * If tuple is being updated by other transaction then we have to
			 * wait for its commit/abort.
1944
			 */
1945
			if (TransactionIdIsValid(SnapshotDirty->xmax))
1946 1947
			{
				ReleaseBuffer(buffer);
1948 1949
				XactLockTableWait(SnapshotDirty->xmax);
				continue;		/* loop back to repeat heap_fetch */
1950 1951
			}

1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969
			/*
			 * If tuple was inserted by our own transaction, we have to check
			 * cmin against curCid: cmin >= curCid means our command cannot
			 * see the tuple, so we should ignore it.  Without this we are
			 * open to the "Halloween problem" of indefinitely re-updating
			 * the same tuple.  (We need not check cmax because
			 * HeapTupleSatisfiesDirty will consider a tuple deleted by
			 * our transaction dead, regardless of cmax.)  We just checked
			 * that priorXmax == xmin, so we can test that variable instead
			 * of doing HeapTupleHeaderGetXmin again.
			 */
			if (TransactionIdIsCurrentTransactionId(priorXmax) &&
				HeapTupleHeaderGetCmin(tuple.t_data) >= curCid)
			{
				ReleaseBuffer(buffer);
				return NULL;
			}

1970 1971 1972 1973 1974 1975 1976 1977 1978
			/*
			 * We got tuple - now copy it for use by recheck query.
			 */
			copyTuple = heap_copytuple(&tuple);
			ReleaseBuffer(buffer);
			break;
		}

		/*
B
Bruce Momjian 已提交
1979 1980
		 * If the referenced slot was actually empty, the latest version of
		 * the row must have been deleted, so we need do nothing.
1981
		 */
1982
		if (tuple.t_data == NULL)
1983
		{
1984 1985
			ReleaseBuffer(buffer);
			return NULL;
1986 1987 1988
		}

		/*
1989
		 * As above, if xmin isn't what we're expecting, do nothing.
1990
		 */
1991 1992 1993 1994 1995 1996 1997 1998 1999
		if (!TransactionIdEquals(HeapTupleHeaderGetXmin(tuple.t_data),
								 priorXmax))
		{
			ReleaseBuffer(buffer);
			return NULL;
		}

		/*
		 * If we get here, the tuple was found but failed SnapshotDirty.
B
Bruce Momjian 已提交
2000 2001 2002 2003 2004 2005
		 * Assuming the xmin is either a committed xact or our own xact (as it
		 * certainly should be if we're trying to modify the tuple), this must
		 * mean that the row was updated or deleted by either a committed xact
		 * or our own xact.  If it was deleted, we can ignore it; if it was
		 * updated then chain up to the next version and repeat the whole
		 * test.
2006
		 *
B
Bruce Momjian 已提交
2007 2008
		 * As above, it should be safe to examine xmax and t_ctid without the
		 * buffer content lock, because they can't be changing.
2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022
		 */
		if (ItemPointerEquals(&tuple.t_self, &tuple.t_data->t_ctid))
		{
			/* deleted, so forget about it */
			ReleaseBuffer(buffer);
			return NULL;
		}

		/* updated, so look at the updated row */
		tuple.t_self = tuple.t_data->t_ctid;
		/* updated row should have xmin matching this xmax */
		priorXmax = HeapTupleHeaderGetXmax(tuple.t_data);
		ReleaseBuffer(buffer);
		/* loop back to fetch next in chain */
2023 2024 2025
	}

	/*
B
Bruce Momjian 已提交
2026 2027
	 * For UPDATE/DELETE we have to return tid of actual row we're executing
	 * PQ for.
2028 2029 2030 2031
	 */
	*tid = tuple.t_self;

	/*
2032
	 * Need to run a recheck subquery.	Find or create a PQ stack entry.
2033
	 */
2034
	epq = estate->es_evalPlanQual;
2035 2036
	endNode = true;

2037 2038
	if (epq != NULL && epq->rti == 0)
	{
2039
		/* Top PQ stack entry is idle, so re-use it */
2040
		Assert(!(estate->es_useEvalPlan) && epq->next == NULL);
2041 2042 2043 2044 2045
		epq->rti = rti;
		endNode = false;
	}

	/*
B
Bruce Momjian 已提交
2046 2047 2048 2049
	 * If this is request for another RTE - Ra, - then we have to check wasn't
	 * PlanQual requested for Ra already and if so then Ra' row was updated
	 * again and we have to re-start old execution for Ra and forget all what
	 * we done after Ra was suspended. Cool? -:))
2050
	 */
B
Bruce Momjian 已提交
2051
	if (epq != NULL && epq->rti != rti &&
2052
		epq->estate->es_evTuple[rti - 1] != NULL)
2053 2054 2055
	{
		do
		{
2056 2057
			evalPlanQual *oldepq;

2058
			/* stop execution */
2059 2060 2061 2062
			EvalPlanQualStop(epq);
			/* pop previous PlanQual from the stack */
			oldepq = epq->next;
			Assert(oldepq && oldepq->rti != 0);
2063 2064 2065
			/* push current PQ to freePQ stack */
			oldepq->free = epq;
			epq = oldepq;
2066
			estate->es_evalPlanQual = epq;
2067 2068 2069
		} while (epq->rti != rti);
	}

B
Bruce Momjian 已提交
2070
	/*
B
Bruce Momjian 已提交
2071 2072
	 * If we are requested for another RTE then we have to suspend execution
	 * of current PlanQual and start execution for new one.
2073 2074 2075 2076
	 */
	if (epq == NULL || epq->rti != rti)
	{
		/* try to reuse plan used previously */
B
Bruce Momjian 已提交
2077
		evalPlanQual *newepq = (epq != NULL) ? epq->free : NULL;
2078

2079
		if (newepq == NULL)		/* first call or freePQ stack is empty */
2080
		{
2081
			newepq = (evalPlanQual *) palloc0(sizeof(evalPlanQual));
2082
			newepq->free = NULL;
2083 2084
			newepq->estate = NULL;
			newepq->planstate = NULL;
2085 2086
		}
		else
2087
		{
2088 2089 2090
			/* recycle previously used PlanQual */
			Assert(newepq->estate == NULL);
			epq->free = NULL;
2091
		}
2092
		/* push current PQ to the stack */
2093
		newepq->next = epq;
2094
		epq = newepq;
2095
		estate->es_evalPlanQual = epq;
2096 2097 2098 2099
		epq->rti = rti;
		endNode = false;
	}

2100
	Assert(epq->rti == rti);
2101 2102

	/*
B
Bruce Momjian 已提交
2103 2104 2105 2106 2107 2108
	 * Ok - we're requested for the same RTE.  Unfortunately we still have to
	 * end and restart execution of the plan, because ExecReScan wouldn't
	 * ensure that upper plan nodes would reset themselves.  We could make
	 * that work if insertion of the target tuple were integrated with the
	 * Param mechanism somehow, so that the upper plan nodes know that their
	 * children's outputs have changed.
2109
	 *
B
Bruce Momjian 已提交
2110 2111
	 * Note that the stack of free evalPlanQual nodes is quite useless at the
	 * moment, since it only saves us from pallocing/releasing the
B
Bruce Momjian 已提交
2112 2113
	 * evalPlanQual nodes themselves.  But it will be useful once we implement
	 * ReScan instead of end/restart for re-using PlanQual nodes.
2114 2115
	 */
	if (endNode)
2116
	{
2117
		/* stop execution */
2118
		EvalPlanQualStop(epq);
2119
	}
2120

2121 2122 2123
	/*
	 * Initialize new recheck query.
	 *
B
Bruce Momjian 已提交
2124 2125
	 * Note: if we were re-using PlanQual plans via ExecReScan, we'd need to
	 * instead copy down changeable state from the top plan (including
B
Bruce Momjian 已提交
2126 2127
	 * es_result_relation_info, es_junkFilter) and reset locally changeable
	 * state in the epq (including es_param_exec_vals, es_evTupleNull).
2128 2129 2130
	 */
	EvalPlanQualStart(epq, estate, epq->next);

2131
	/*
B
Bruce Momjian 已提交
2132 2133
	 * free old RTE' tuple, if any, and store target tuple where relation's
	 * scan node will see it
2134
	 */
2135
	epqstate = epq->estate;
2136 2137 2138
	if (epqstate->es_evTuple[rti - 1] != NULL)
		heap_freetuple(epqstate->es_evTuple[rti - 1]);
	epqstate->es_evTuple[rti - 1] = copyTuple;
2139

2140
	return EvalPlanQualNext(estate);
2141 2142
}

B
Bruce Momjian 已提交
2143
static TupleTableSlot *
2144 2145
EvalPlanQualNext(EState *estate)
{
2146 2147
	evalPlanQual *epq = estate->es_evalPlanQual;
	MemoryContext oldcontext;
B
Bruce Momjian 已提交
2148
	TupleTableSlot *slot;
2149 2150 2151 2152

	Assert(epq->rti != 0);

lpqnext:;
2153
	oldcontext = MemoryContextSwitchTo(epq->estate->es_query_cxt);
2154
	slot = ExecProcNode(epq->planstate);
2155
	MemoryContextSwitchTo(oldcontext);
2156 2157 2158 2159 2160 2161

	/*
	 * No more tuples for this PQ. Continue previous one.
	 */
	if (TupIsNull(slot))
	{
2162 2163
		evalPlanQual *oldepq;

2164
		/* stop execution */
2165
		EvalPlanQualStop(epq);
2166
		/* pop old PQ from the stack */
2167 2168
		oldepq = epq->next;
		if (oldepq == NULL)
2169
		{
2170 2171 2172 2173
			/* this is the first (oldest) PQ - mark as free */
			epq->rti = 0;
			estate->es_useEvalPlan = false;
			/* and continue Query execution */
2174
			return NULL;
2175 2176 2177 2178 2179
		}
		Assert(oldepq->rti != 0);
		/* push current PQ to freePQ stack */
		oldepq->free = epq;
		epq = oldepq;
2180
		estate->es_evalPlanQual = epq;
2181 2182 2183
		goto lpqnext;
	}

2184
	return slot;
2185
}
2186 2187 2188 2189

static void
EndEvalPlanQual(EState *estate)
{
2190
	evalPlanQual *epq = estate->es_evalPlanQual;
2191

2192 2193
	if (epq->rti == 0)			/* plans already shutdowned */
	{
2194
		Assert(epq->next == NULL);
2195
		return;
2196
	}
2197 2198 2199

	for (;;)
	{
2200 2201
		evalPlanQual *oldepq;

2202
		/* stop execution */
2203
		EvalPlanQualStop(epq);
2204
		/* pop old PQ from the stack */
2205 2206
		oldepq = epq->next;
		if (oldepq == NULL)
2207
		{
2208 2209 2210
			/* this is the first (oldest) PQ - mark as free */
			epq->rti = 0;
			estate->es_useEvalPlan = false;
2211 2212 2213 2214 2215 2216
			break;
		}
		Assert(oldepq->rti != 0);
		/* push current PQ to freePQ stack */
		oldepq->free = epq;
		epq = oldepq;
2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233
		estate->es_evalPlanQual = epq;
	}
}

/*
 * Start execution of one level of PlanQual.
 *
 * This is a cut-down version of ExecutorStart(): we copy some state from
 * the top-level estate rather than initializing it fresh.
 */
static void
EvalPlanQualStart(evalPlanQual *epq, EState *estate, evalPlanQual *priorepq)
{
	EState	   *epqstate;
	int			rtsize;
	MemoryContext oldcontext;

2234
	rtsize = list_length(estate->es_range_table);
2235 2236 2237 2238 2239 2240

	epq->estate = epqstate = CreateExecutorState();

	oldcontext = MemoryContextSwitchTo(epqstate->es_query_cxt);

	/*
B
Bruce Momjian 已提交
2241 2242 2243 2244
	 * The epqstates share the top query's copy of unchanging state such as
	 * the snapshot, rangetable, result-rel info, and external Param info.
	 * They need their own copies of local state, including a tuple table,
	 * es_param_exec_vals, etc.
2245 2246 2247
	 */
	epqstate->es_direction = ForwardScanDirection;
	epqstate->es_snapshot = estate->es_snapshot;
2248
	epqstate->es_crosscheck_snapshot = estate->es_crosscheck_snapshot;
2249 2250 2251 2252 2253 2254
	epqstate->es_range_table = estate->es_range_table;
	epqstate->es_result_relations = estate->es_result_relations;
	epqstate->es_num_result_relations = estate->es_num_result_relations;
	epqstate->es_result_relation_info = estate->es_result_relation_info;
	epqstate->es_junkFilter = estate->es_junkFilter;
	epqstate->es_into_relation_descriptor = estate->es_into_relation_descriptor;
2255
	epqstate->es_into_relation_use_wal = estate->es_into_relation_use_wal;
2256 2257 2258 2259
	epqstate->es_param_list_info = estate->es_param_list_info;
	if (estate->es_topPlan->nParamExec > 0)
		epqstate->es_param_exec_vals = (ParamExecData *)
			palloc0(estate->es_topPlan->nParamExec * sizeof(ParamExecData));
2260
	epqstate->es_rowMarks = estate->es_rowMarks;
2261
	epqstate->es_instrument = estate->es_instrument;
2262 2263
	epqstate->es_select_into = estate->es_select_into;
	epqstate->es_into_oids = estate->es_into_oids;
2264
	epqstate->es_topPlan = estate->es_topPlan;
B
Bruce Momjian 已提交
2265

2266
	/*
B
Bruce Momjian 已提交
2267 2268 2269
	 * Each epqstate must have its own es_evTupleNull state, but all the stack
	 * entries share es_evTuple state.	This allows sub-rechecks to inherit
	 * the value being examined by an outer recheck.
2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282
	 */
	epqstate->es_evTupleNull = (bool *) palloc0(rtsize * sizeof(bool));
	if (priorepq == NULL)
		/* first PQ stack entry */
		epqstate->es_evTuple = (HeapTuple *)
			palloc0(rtsize * sizeof(HeapTuple));
	else
		/* later stack entries share the same storage */
		epqstate->es_evTuple = priorepq->estate->es_evTuple;

	epqstate->es_tupleTable =
		ExecCreateTupleTable(estate->es_tupleTable->size);

2283
	epq->planstate = ExecInitNode(estate->es_topPlan, epqstate, 0);
2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311

	MemoryContextSwitchTo(oldcontext);
}

/*
 * End execution of one level of PlanQual.
 *
 * This is a cut-down version of ExecutorEnd(); basically we want to do most
 * of the normal cleanup, but *not* close result relations (which we are
 * just sharing from the outer query).
 */
static void
EvalPlanQualStop(evalPlanQual *epq)
{
	EState	   *epqstate = epq->estate;
	MemoryContext oldcontext;

	oldcontext = MemoryContextSwitchTo(epqstate->es_query_cxt);

	ExecEndNode(epq->planstate);

	ExecDropTupleTable(epqstate->es_tupleTable, true);
	epqstate->es_tupleTable = NULL;

	if (epqstate->es_evTuple[epq->rti - 1] != NULL)
	{
		heap_freetuple(epqstate->es_evTuple[epq->rti - 1]);
		epqstate->es_evTuple[epq->rti - 1] = NULL;
2312
	}
2313 2314 2315 2316 2317 2318 2319

	MemoryContextSwitchTo(oldcontext);

	FreeExecutorState(epqstate);

	epq->estate = NULL;
	epq->planstate = NULL;
2320
}