execMain.c 62.5 KB
Newer Older
1 2
/*-------------------------------------------------------------------------
 *
3
 * execMain.c
4
 *	  top level executor interface routines
5 6
 *
 * INTERFACE ROUTINES
7 8 9
 *	ExecutorStart()
 *	ExecutorRun()
 *	ExecutorEnd()
10
 *
11 12 13 14
 *	The old ExecutorMain() has been replaced by ExecutorStart(),
 *	ExecutorRun() and ExecutorEnd()
 *
 *	These three procedures are the external interfaces to the executor.
15
 *	In each case, the query descriptor is required as an argument.
16
 *
17
 *	ExecutorStart() must be called at the beginning of execution of any
18 19 20
 *	query plan and ExecutorEnd() should always be called at the end of
 *	execution of a plan.
 *
21
 *	ExecutorRun accepts direction and count arguments that specify whether
22
 *	the plan is to be executed forwards, backwards, and for how many tuples.
23
 *
24
 * Portions Copyright (c) 1996-2006, PostgreSQL Global Development Group
B
Add:  
Bruce Momjian 已提交
25
 * Portions Copyright (c) 1994, Regents of the University of California
26 27 28
 *
 *
 * IDENTIFICATION
29
 *	  $PostgreSQL: pgsql/src/backend/executor/execMain.c,v 1.269 2006/03/05 15:58:25 momjian Exp $
30 31 32
 *
 *-------------------------------------------------------------------------
 */
33 34
#include "postgres.h"

35
#include "access/heapam.h"
36
#include "access/xlog.h"
37
#include "catalog/heap.h"
38
#include "catalog/namespace.h"
39
#include "commands/tablecmds.h"
40
#include "commands/tablespace.h"
41
#include "commands/trigger.h"
B
Bruce Momjian 已提交
42 43
#include "executor/execdebug.h"
#include "executor/execdefs.h"
44
#include "executor/instrument.h"
B
Bruce Momjian 已提交
45
#include "miscadmin.h"
46
#include "optimizer/clauses.h"
B
Bruce Momjian 已提交
47 48
#include "optimizer/var.h"
#include "parser/parsetree.h"
49
#include "storage/smgr.h"
B
Bruce Momjian 已提交
50
#include "utils/acl.h"
51
#include "utils/guc.h"
52
#include "utils/lsyscache.h"
53
#include "utils/memutils.h"
54

55

56 57 58 59 60 61 62 63 64
typedef struct evalPlanQual
{
	Index		rti;
	EState	   *estate;
	PlanState  *planstate;
	struct evalPlanQual *next;	/* stack of active PlanQual plans */
	struct evalPlanQual *free;	/* list of free PlanQual plans */
} evalPlanQual;

65
/* decls for local routines only used within this module */
66
static void InitPlan(QueryDesc *queryDesc, int eflags);
67
static void initResultRelInfo(ResultRelInfo *resultRelInfo,
B
Bruce Momjian 已提交
68 69
				  Index resultRelationIndex,
				  List *rangeTable,
70 71
				  CmdType operation,
				  bool doInstrument);
72
static TupleTableSlot *ExecutePlan(EState *estate, PlanState *planstate,
B
Bruce Momjian 已提交
73 74 75
			CmdType operation,
			long numberTuples,
			ScanDirection direction,
76
			DestReceiver *dest);
77
static void ExecSelect(TupleTableSlot *slot,
78
		   DestReceiver *dest,
B
Bruce Momjian 已提交
79
		   EState *estate);
80
static void ExecInsert(TupleTableSlot *slot, ItemPointer tupleid,
B
Bruce Momjian 已提交
81
		   EState *estate);
82
static void ExecDelete(TupleTableSlot *slot, ItemPointer tupleid,
B
Bruce Momjian 已提交
83
		   EState *estate);
84
static void ExecUpdate(TupleTableSlot *slot, ItemPointer tupleid,
B
Bruce Momjian 已提交
85
		   EState *estate);
86
static TupleTableSlot *EvalPlanQualNext(EState *estate);
87
static void EndEvalPlanQual(EState *estate);
88 89
static void ExecCheckRTEPerms(RangeTblEntry *rte);
static void ExecCheckXactReadOnly(Query *parsetree);
90
static void EvalPlanQualStart(evalPlanQual *epq, EState *estate,
B
Bruce Momjian 已提交
91
				  evalPlanQual *priorepq);
92
static void EvalPlanQualStop(evalPlanQual *epq);
93

94 95
/* end of local decls */

96

97
/* ----------------------------------------------------------------
98 99 100 101 102
 *		ExecutorStart
 *
 *		This routine must be called at the beginning of any execution of any
 *		query plan
 *
103
 * Takes a QueryDesc previously created by CreateQueryDesc (it's not real
B
Bruce Momjian 已提交
104
 * clear why we bother to separate the two functions, but...).	The tupDesc
105 106
 * field of the QueryDesc is filled in to describe the tuples that will be
 * returned, and the internal fields (estate and planstate) are set up.
107
 *
108
 * eflags contains flag bits as described in executor.h.
109
 *
110 111
 * NB: the CurrentMemoryContext when this is called will become the parent
 * of the per-query context used for this Executor invocation.
112 113
 * ----------------------------------------------------------------
 */
114
void
115
ExecutorStart(QueryDesc *queryDesc, int eflags)
116
{
117
	EState	   *estate;
118
	MemoryContext oldcontext;
119

120
	/* sanity checks: queryDesc must not be started already */
121
	Assert(queryDesc != NULL);
122 123
	Assert(queryDesc->estate == NULL);

124
	/*
B
Bruce Momjian 已提交
125
	 * If the transaction is read-only, we need to check if any writes are
126
	 * planned to non-temporary tables.  EXPLAIN is considered read-only.
127
	 */
128
	if (XactReadOnly && !(eflags & EXEC_FLAG_EXPLAIN_ONLY))
129
		ExecCheckXactReadOnly(queryDesc->parsetree);
130

131
	/*
132
	 * Build EState, switch into per-query memory context for startup.
133 134 135 136
	 */
	estate = CreateExecutorState();
	queryDesc->estate = estate;

137 138 139 140 141
	oldcontext = MemoryContextSwitchTo(estate->es_query_cxt);

	/*
	 * Fill in parameters, if any, from queryDesc
	 */
142
	estate->es_param_list_info = queryDesc->params;
143

V
Vadim B. Mikheev 已提交
144
	if (queryDesc->plantree->nParamExec > 0)
145
		estate->es_param_exec_vals = (ParamExecData *)
146
			palloc0(queryDesc->plantree->nParamExec * sizeof(ParamExecData));
147

148
	/*
149
	 * Copy other important information into the EState
150
	 */
151 152 153
	estate->es_snapshot = queryDesc->snapshot;
	estate->es_crosscheck_snapshot = queryDesc->crosscheck_snapshot;
	estate->es_instrument = queryDesc->doInstrument;
154

155
	/*
156
	 * Initialize the plan state tree
157
	 */
158
	InitPlan(queryDesc, eflags);
159 160

	MemoryContextSwitchTo(oldcontext);
161 162 163
}

/* ----------------------------------------------------------------
164 165 166 167 168 169 170
 *		ExecutorRun
 *
 *		This is the main routine of the executor module. It accepts
 *		the query descriptor from the traffic cop and executes the
 *		query plan.
 *
 *		ExecutorStart must have been called already.
171
 *
172 173 174
 *		If direction is NoMovementScanDirection then nothing is done
 *		except to start up/shut down the destination.  Otherwise,
 *		we retrieve up to 'count' tuples in the specified direction.
175
 *
176
 *		Note: count = 0 is interpreted as no portal limit, i.e., run to
177
 *		completion.
178
 *
179 180
 * ----------------------------------------------------------------
 */
181
TupleTableSlot *
182
ExecutorRun(QueryDesc *queryDesc,
183
			ScanDirection direction, long count)
184
{
185
	EState	   *estate;
186
	CmdType		operation;
187
	DestReceiver *dest;
188
	TupleTableSlot *result;
189 190 191 192 193 194 195 196
	MemoryContext oldcontext;

	/* sanity checks */
	Assert(queryDesc != NULL);

	estate = queryDesc->estate;

	Assert(estate != NULL);
197

B
Bruce Momjian 已提交
198
	/*
199
	 * Switch into per-query memory context
200
	 */
201
	oldcontext = MemoryContextSwitchTo(estate->es_query_cxt);
202

B
Bruce Momjian 已提交
203
	/*
B
Bruce Momjian 已提交
204
	 * extract information from the query descriptor and the query feature.
205
	 */
206 207 208
	operation = queryDesc->operation;
	dest = queryDesc->dest;

B
Bruce Momjian 已提交
209
	/*
210
	 * startup tuple receiver
211
	 */
212 213
	estate->es_processed = 0;
	estate->es_lastoid = InvalidOid;
214

215
	(*dest->rStartup) (dest, operation, queryDesc->tupDesc);
216

217 218 219
	/*
	 * run plan
	 */
220
	if (ScanDirectionIsNoMovement(direction))
221 222 223
		result = NULL;
	else
		result = ExecutePlan(estate,
224
							 queryDesc->planstate,
225 226 227
							 operation,
							 count,
							 direction,
228
							 dest);
229

230 231 232
	/*
	 * shutdown receiver
	 */
233
	(*dest->rShutdown) (dest);
234

235 236
	MemoryContextSwitchTo(oldcontext);

237
	return result;
238 239 240
}

/* ----------------------------------------------------------------
241 242
 *		ExecutorEnd
 *
243
 *		This routine must be called at the end of execution of any
244
 *		query plan
245 246 247
 * ----------------------------------------------------------------
 */
void
248
ExecutorEnd(QueryDesc *queryDesc)
249
{
250
	EState	   *estate;
251
	MemoryContext oldcontext;
252

253 254
	/* sanity checks */
	Assert(queryDesc != NULL);
255

256 257
	estate = queryDesc->estate;

258
	Assert(estate != NULL);
259

260
	/*
261
	 * Switch into per-query memory context to run ExecEndPlan
262
	 */
263 264 265
	oldcontext = MemoryContextSwitchTo(estate->es_query_cxt);

	ExecEndPlan(queryDesc->planstate, estate);
266

267
	/*
268
	 * Must switch out of context before destroying it
269
	 */
270
	MemoryContextSwitchTo(oldcontext);
271

272
	/*
273 274
	 * Release EState and per-query memory context.  This should release
	 * everything the executor has allocated.
275
	 */
276 277 278 279 280 281
	FreeExecutorState(estate);

	/* Reset queryDesc fields that no longer point to anything */
	queryDesc->tupDesc = NULL;
	queryDesc->estate = NULL;
	queryDesc->planstate = NULL;
282
}
283

284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319
/* ----------------------------------------------------------------
 *		ExecutorRewind
 *
 *		This routine may be called on an open queryDesc to rewind it
 *		to the start.
 * ----------------------------------------------------------------
 */
void
ExecutorRewind(QueryDesc *queryDesc)
{
	EState	   *estate;
	MemoryContext oldcontext;

	/* sanity checks */
	Assert(queryDesc != NULL);

	estate = queryDesc->estate;

	Assert(estate != NULL);

	/* It's probably not sensible to rescan updating queries */
	Assert(queryDesc->operation == CMD_SELECT);

	/*
	 * Switch into per-query memory context
	 */
	oldcontext = MemoryContextSwitchTo(estate->es_query_cxt);

	/*
	 * rescan plan
	 */
	ExecReScan(queryDesc->planstate, NULL);

	MemoryContextSwitchTo(oldcontext);
}

320

321 322 323 324
/*
 * ExecCheckRTPerms
 *		Check access permissions for all relations listed in a range table.
 */
325
void
326
ExecCheckRTPerms(List *rangeTable)
327
{
328
	ListCell   *l;
329

330
	foreach(l, rangeTable)
331
	{
332
		RangeTblEntry *rte = lfirst(l);
333

334
		ExecCheckRTEPerms(rte);
335 336 337 338 339 340 341 342
	}
}

/*
 * ExecCheckRTEPerms
 *		Check access permissions for a single RTE.
 */
static void
343
ExecCheckRTEPerms(RangeTblEntry *rte)
344
{
345
	AclMode		requiredPerms;
346
	Oid			relOid;
B
Bruce Momjian 已提交
347
	Oid			userid;
348

B
Bruce Momjian 已提交
349
	/*
B
Bruce Momjian 已提交
350 351 352 353 354 355
	 * Only plain-relation RTEs need to be checked here.  Subquery RTEs are
	 * checked by ExecInitSubqueryScan if the subquery is still a separate
	 * subquery --- if it's been pulled up into our query level then the RTEs
	 * are in our rangetable and will be checked here. Function RTEs are
	 * checked by init_fcache when the function is prepared for execution.
	 * Join and special RTEs need no checks.
B
Bruce Momjian 已提交
356
	 */
357
	if (rte->rtekind != RTE_RELATION)
358 359
		return;

360 361 362 363 364 365 366
	/*
	 * No work if requiredPerms is empty.
	 */
	requiredPerms = rte->requiredPerms;
	if (requiredPerms == 0)
		return;

367
	relOid = rte->relid;
368 369

	/*
B
Bruce Momjian 已提交
370
	 * userid to check as: current user unless we have a setuid indication.
371
	 *
372 373 374 375
	 * Note: GetUserId() is presently fast enough that there's no harm in
	 * calling it separately for each RTE.	If that stops being true, we could
	 * call it once in ExecCheckRTPerms and pass the userid down from there.
	 * But for now, no need for the extra clutter.
376
	 */
377
	userid = rte->checkAsUser ? rte->checkAsUser : GetUserId();
378

379
	/*
B
Bruce Momjian 已提交
380
	 * We must have *all* the requiredPerms bits, so use aclmask not aclcheck.
381
	 */
382 383 384 385
	if (pg_class_aclmask(relOid, userid, requiredPerms, ACLMASK_ALL)
		!= requiredPerms)
		aclcheck_error(ACLCHECK_NO_PRIV, ACL_KIND_CLASS,
					   get_rel_name(relOid));
386 387
}

388 389 390
/*
 * Check that the query does not imply any writes to non-temp tables.
 */
391
static void
392
ExecCheckXactReadOnly(Query *parsetree)
393
{
394
	ListCell   *l;
395

396 397 398 399 400 401
	/*
	 * CREATE TABLE AS or SELECT INTO?
	 *
	 * XXX should we allow this if the destination is temp?
	 */
	if (parsetree->into != NULL)
402 403
		goto fail;

404
	/* Fail if write permissions are requested on any non-temp table */
405
	foreach(l, parsetree->rtable)
406
	{
407
		RangeTblEntry *rte = lfirst(l);
408

409
		if (rte->rtekind == RTE_SUBQUERY)
410
		{
411 412 413
			ExecCheckXactReadOnly(rte->subquery);
			continue;
		}
414

415 416
		if (rte->rtekind != RTE_RELATION)
			continue;
417

418 419
		if ((rte->requiredPerms & (~ACL_SELECT)) == 0)
			continue;
420

421 422
		if (isTempNamespace(get_rel_namespace(rte->relid)))
			continue;
423

424
		goto fail;
425 426 427 428 429
	}

	return;

fail:
430 431 432
	ereport(ERROR,
			(errcode(ERRCODE_READ_ONLY_SQL_TRANSACTION),
			 errmsg("transaction is read-only")));
433 434 435
}


436
/* ----------------------------------------------------------------
437 438 439 440
 *		InitPlan
 *
 *		Initializes the query plan: open files, allocate storage
 *		and start up the rule manager
441 442
 * ----------------------------------------------------------------
 */
443
static void
444
InitPlan(QueryDesc *queryDesc, int eflags)
445
{
446
	CmdType		operation = queryDesc->operation;
B
Bruce Momjian 已提交
447 448 449
	Query	   *parseTree = queryDesc->parsetree;
	Plan	   *plan = queryDesc->plantree;
	EState	   *estate = queryDesc->estate;
450
	PlanState  *planstate;
B
Bruce Momjian 已提交
451 452
	List	   *rangeTable;
	Relation	intoRelationDesc;
453
	bool		do_select_into;
B
Bruce Momjian 已提交
454
	TupleDesc	tupType;
455

456
	/*
B
Bruce Momjian 已提交
457 458
	 * Do permissions checks.  It's sufficient to examine the query's top
	 * rangetable here --- subplan RTEs will be checked during
459
	 * ExecInitSubPlan().
460
	 */
461
	ExecCheckRTPerms(parseTree->rtable);
462

B
Bruce Momjian 已提交
463
	/*
B
Bruce Momjian 已提交
464
	 * get information from query descriptor
465
	 */
466
	rangeTable = parseTree->rtable;
467

B
Bruce Momjian 已提交
468
	/*
B
Bruce Momjian 已提交
469
	 * initialize the node's execution state
470
	 */
471 472
	estate->es_range_table = rangeTable;

B
Bruce Momjian 已提交
473
	/*
474
	 * if there is a result relation, initialize result relation stuff
475
	 */
476
	if (parseTree->resultRelation != 0 && operation != CMD_SELECT)
477
	{
478 479 480
		List	   *resultRelations = parseTree->resultRelations;
		int			numResultRelations;
		ResultRelInfo *resultRelInfos;
B
Bruce Momjian 已提交
481

482 483 484 485 486 487
		if (resultRelations != NIL)
		{
			/*
			 * Multiple result relations (due to inheritance)
			 * parseTree->resultRelations identifies them all
			 */
B
Bruce Momjian 已提交
488 489
			ResultRelInfo *resultRelInfo;
			ListCell   *l;
490

491
			numResultRelations = list_length(resultRelations);
492 493 494
			resultRelInfos = (ResultRelInfo *)
				palloc(numResultRelations * sizeof(ResultRelInfo));
			resultRelInfo = resultRelInfos;
495
			foreach(l, resultRelations)
496 497
			{
				initResultRelInfo(resultRelInfo,
498
								  lfirst_int(l),
499
								  rangeTable,
500 501
								  operation,
								  estate->es_instrument);
502 503 504 505 506 507
				resultRelInfo++;
			}
		}
		else
		{
			/*
B
Bruce Momjian 已提交
508
			 * Single result relation identified by parseTree->resultRelation
509 510 511 512 513 514
			 */
			numResultRelations = 1;
			resultRelInfos = (ResultRelInfo *) palloc(sizeof(ResultRelInfo));
			initResultRelInfo(resultRelInfos,
							  parseTree->resultRelation,
							  rangeTable,
515 516
							  operation,
							  estate->es_instrument);
517
		}
518

519 520 521 522
		estate->es_result_relations = resultRelInfos;
		estate->es_num_result_relations = numResultRelations;
		/* Initialize to first or only result rel */
		estate->es_result_relation_info = resultRelInfos;
523
	}
524 525
	else
	{
B
Bruce Momjian 已提交
526
		/*
B
Bruce Momjian 已提交
527
		 * if no result relation, then set state appropriately
528
		 */
529 530
		estate->es_result_relations = NULL;
		estate->es_num_result_relations = 0;
531 532 533
		estate->es_result_relation_info = NULL;
	}

534
	/*
T
Tom Lane 已提交
535
	 * Detect whether we're doing SELECT INTO.  If so, set the es_into_oids
B
Bruce Momjian 已提交
536 537
	 * flag appropriately so that the plan tree will be initialized with the
	 * correct tuple descriptors.
538 539 540
	 */
	do_select_into = false;

541
	if (operation == CMD_SELECT && parseTree->into != NULL)
542 543
	{
		do_select_into = true;
544 545
		estate->es_select_into = true;
		estate->es_into_oids = parseTree->intoHasOids;
546 547
	}

548
	/*
549
	 * Have to lock relations selected FOR UPDATE/FOR SHARE
550
	 */
551
	estate->es_rowMarks = NIL;
552
	estate->es_forUpdate = parseTree->forUpdate;
553
	estate->es_rowNoWait = parseTree->rowNoWait;
554
	if (parseTree->rowMarks != NIL)
555
	{
556
		ListCell   *l;
557

558
		foreach(l, parseTree->rowMarks)
559
		{
560
			Index		rti = lfirst_int(l);
561
			Oid			relid = getrelid(rti, rangeTable);
562
			Relation	relation;
563
			ExecRowMark *erm;
564 565

			relation = heap_open(relid, RowShareLock);
566
			erm = (ExecRowMark *) palloc(sizeof(ExecRowMark));
567
			erm->relation = relation;
568
			erm->rti = rti;
569
			snprintf(erm->resname, sizeof(erm->resname), "ctid%u", rti);
570
			estate->es_rowMarks = lappend(estate->es_rowMarks, erm);
571 572
		}
	}
573

B
Bruce Momjian 已提交
574
	/*
B
Bruce Momjian 已提交
575 576 577
	 * initialize the executor "tuple" table.  We need slots for all the plan
	 * nodes, plus possibly output slots for the junkfilter(s). At this point
	 * we aren't sure if we need junkfilters, so just add slots for them
578 579
	 * unconditionally.  Also, if it's not a SELECT, set up a slot for use for
	 * trigger output tuples.
580 581
	 */
	{
582
		int			nSlots = ExecCountSlotsNode(plan);
583

584
		if (parseTree->resultRelations != NIL)
585
			nSlots += list_length(parseTree->resultRelations);
586 587
		else
			nSlots += 1;
588 589 590
		if (operation != CMD_SELECT)
			nSlots++;

591
		estate->es_tupleTable = ExecCreateTupleTable(nSlots);
592 593 594 595

		if (operation != CMD_SELECT)
			estate->es_trig_tuple_slot =
				ExecAllocTableSlot(estate->es_tupleTable);
596
	}
597

598
	/* mark EvalPlanQual not active */
599
	estate->es_topPlan = plan;
600 601
	estate->es_evalPlanQual = NULL;
	estate->es_evTupleNull = NULL;
602
	estate->es_evTuple = NULL;
603 604
	estate->es_useEvalPlan = false;

B
Bruce Momjian 已提交
605
	/*
B
Bruce Momjian 已提交
606 607 608
	 * initialize the private state information for all the nodes in the query
	 * tree.  This opens files, allocates storage and leaves us ready to start
	 * processing tuples.
609
	 */
610
	planstate = ExecInitNode(plan, estate, eflags);
611

B
Bruce Momjian 已提交
612
	/*
B
Bruce Momjian 已提交
613 614 615
	 * Get the tuple descriptor describing the type of tuples to return. (this
	 * is especially important if we are creating a relation with "SELECT
	 * INTO")
616
	 */
617
	tupType = ExecGetResultType(planstate);
618

B
Bruce Momjian 已提交
619
	/*
B
Bruce Momjian 已提交
620 621 622 623 624 625
	 * Initialize the junk filter if needed.  SELECT and INSERT queries need a
	 * filter if there are any junk attrs in the tlist.  INSERT and SELECT
	 * INTO also need a filter if the plan may return raw disk tuples (else
	 * heap_insert will be scribbling on the source relation!). UPDATE and
	 * DELETE always need a filter, since there's always a junk 'ctid'
	 * attribute present --- no need to look first.
626 627
	 */
	{
628
		bool		junk_filter_needed = false;
629
		ListCell   *tlist;
630

631
		switch (operation)
632
		{
633 634
			case CMD_SELECT:
			case CMD_INSERT:
635
				foreach(tlist, plan->targetlist)
636
				{
637 638
					TargetEntry *tle = (TargetEntry *) lfirst(tlist);

639
					if (tle->resjunk)
640 641 642 643
					{
						junk_filter_needed = true;
						break;
					}
644
				}
645
				if (!junk_filter_needed &&
646 647 648
					(operation == CMD_INSERT || do_select_into) &&
					ExecMayReturnRawTuples(planstate))
					junk_filter_needed = true;
649 650 651 652 653 654 655
				break;
			case CMD_UPDATE:
			case CMD_DELETE:
				junk_filter_needed = true;
				break;
			default:
				break;
656 657
		}

658
		if (junk_filter_needed)
659
		{
660
			/*
B
Bruce Momjian 已提交
661 662 663
			 * If there are multiple result relations, each one needs its own
			 * junk filter.  Note this is only possible for UPDATE/DELETE, so
			 * we can't be fooled by some needing a filter and some not.
664 665 666
			 */
			if (parseTree->resultRelations != NIL)
			{
667 668
				PlanState **appendplans;
				int			as_nplans;
669
				ResultRelInfo *resultRelInfo;
670
				int			i;
671 672 673 674

				/* Top plan had better be an Append here. */
				Assert(IsA(plan, Append));
				Assert(((Append *) plan)->isTarget);
675 676 677 678
				Assert(IsA(planstate, AppendState));
				appendplans = ((AppendState *) planstate)->appendplans;
				as_nplans = ((AppendState *) planstate)->as_nplans;
				Assert(as_nplans == estate->es_num_result_relations);
679
				resultRelInfo = estate->es_result_relations;
680
				for (i = 0; i < as_nplans; i++)
681
				{
682
					PlanState  *subplan = appendplans[i];
683 684
					JunkFilter *j;

685
					j = ExecInitJunkFilter(subplan->plan->targetlist,
B
Bruce Momjian 已提交
686 687
							resultRelInfo->ri_RelationDesc->rd_att->tdhasoid,
								  ExecAllocTableSlot(estate->es_tupleTable));
688 689 690
					resultRelInfo->ri_junkFilter = j;
					resultRelInfo++;
				}
B
Bruce Momjian 已提交
691

692
				/*
B
Bruce Momjian 已提交
693 694
				 * Set active junkfilter too; at this point ExecInitAppend has
				 * already selected an active result relation...
695 696 697 698 699 700 701
				 */
				estate->es_junkFilter =
					estate->es_result_relation_info->ri_junkFilter;
			}
			else
			{
				/* Normal case with just one JunkFilter */
702
				JunkFilter *j;
703

704
				j = ExecInitJunkFilter(planstate->plan->targetlist,
705
									   tupType->tdhasoid,
B
Bruce Momjian 已提交
706
								  ExecAllocTableSlot(estate->es_tupleTable));
707 708 709
				estate->es_junkFilter = j;
				if (estate->es_result_relation_info)
					estate->es_result_relation_info->ri_junkFilter = j;
710

711 712 713 714
				/* For SELECT, want to return the cleaned tuple type */
				if (operation == CMD_SELECT)
					tupType = j->jf_cleanTupType;
			}
715 716 717 718
		}
		else
			estate->es_junkFilter = NULL;
	}
719

B
Bruce Momjian 已提交
720
	/*
721
	 * If doing SELECT INTO, initialize the "into" relation.  We must wait
B
Bruce Momjian 已提交
722 723
	 * till now so we have the "clean" result tuple type to create the new
	 * table from.
724 725
	 *
	 * If EXPLAIN, skip creating the "into" relation.
726
	 */
727
	intoRelationDesc = NULL;
728

729
	if (do_select_into && !(eflags & EXEC_FLAG_EXPLAIN_ONLY))
730
	{
731 732
		char	   *intoName;
		Oid			namespaceId;
733
		Oid			tablespaceId;
734 735 736
		AclResult	aclresult;
		Oid			intoRelationId;
		TupleDesc	tupdesc;
737

738 739 740 741 742 743 744 745
		/*
		 * Check consistency of arguments
		 */
		if (parseTree->intoOnCommit != ONCOMMIT_NOOP && !parseTree->into->istemp)
			ereport(ERROR,
					(errcode(ERRCODE_INVALID_TABLE_DEFINITION),
					 errmsg("ON COMMIT can only be used on temporary tables")));

746 747 748 749 750
		/*
		 * find namespace to create in, check permissions
		 */
		intoName = parseTree->into->relname;
		namespaceId = RangeVarGetCreationNamespace(parseTree->into);
751

752 753 754
		aclresult = pg_namespace_aclcheck(namespaceId, GetUserId(),
										  ACL_CREATE);
		if (aclresult != ACLCHECK_OK)
755 756
			aclcheck_error(aclresult, ACL_KIND_NAMESPACE,
						   get_namespace_name(namespaceId));
757

758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788
		/*
		 * Select tablespace to use.  If not specified, use default_tablespace
		 * (which may in turn default to database's default).
		 */
		if (parseTree->intoTableSpaceName)
		{
			tablespaceId = get_tablespace_oid(parseTree->intoTableSpaceName);
			if (!OidIsValid(tablespaceId))
				ereport(ERROR,
						(errcode(ERRCODE_UNDEFINED_OBJECT),
						 errmsg("tablespace \"%s\" does not exist",
								parseTree->intoTableSpaceName)));
		} else
		{
			tablespaceId = GetDefaultTablespace();
			/* note InvalidOid is OK in this case */
		}

		/* Check permissions except when using the database's default */
		if (OidIsValid(tablespaceId))
		{
			AclResult	aclresult;

			aclresult = pg_tablespace_aclcheck(tablespaceId, GetUserId(),
											   ACL_CREATE);

			if (aclresult != ACLCHECK_OK)
				aclcheck_error(aclresult, ACL_KIND_TABLESPACE,
							   get_tablespace_name(tablespaceId));
		}

789 790 791 792
		/*
		 * have to copy tupType to get rid of constraints
		 */
		tupdesc = CreateTupleDescCopy(tupType);
793

794 795
		intoRelationId = heap_create_with_catalog(intoName,
												  namespaceId,
796
												  tablespaceId,
797
												  InvalidOid,
798
												  GetUserId(),
799 800 801
												  tupdesc,
												  RELKIND_RELATION,
												  false,
802 803
												  true,
												  0,
804
												  parseTree->intoOnCommit,
805
												  allowSystemTableMods);
806

807
		FreeTupleDesc(tupdesc);
808

809
		/*
B
Bruce Momjian 已提交
810 811
		 * Advance command counter so that the newly-created relation's
		 * catalog tuples will be visible to heap_open.
812 813
		 */
		CommandCounterIncrement();
814

815
		/*
B
Bruce Momjian 已提交
816 817 818
		 * If necessary, create a TOAST table for the into relation. Note that
		 * AlterTableCreateToastTable ends with CommandCounterIncrement(), so
		 * that the TOAST table will be visible for insertion.
819 820
		 */
		AlterTableCreateToastTable(intoRelationId, true);
821

822 823 824 825
		/*
		 * And open the constructed table for writing.
		 */
		intoRelationDesc = heap_open(intoRelationId, AccessExclusiveLock);
826 827 828 829 830 831 832

		/* use_wal off requires rd_targblock be initially invalid */
		Assert(intoRelationDesc->rd_targblock == InvalidBlockNumber);

		/*
		 * We can skip WAL-logging the insertions, unless PITR is in use.
		 *
833 834 835 836 837
		 * Note that for a non-temp INTO table, this is safe only because we
		 * know that the catalog changes above will have been WAL-logged, and
		 * so RecordTransactionCommit will think it needs to WAL-log the
		 * eventual transaction commit.  Else the commit might be lost, even
		 * though all the data is safely fsync'd ...
838 839
		 */
		estate->es_into_relation_use_wal = XLogArchivingActive();
840 841 842 843
	}

	estate->es_into_relation_descriptor = intoRelationDesc;

844 845
	queryDesc->tupDesc = tupType;
	queryDesc->planstate = planstate;
846 847
}

848 849 850 851 852 853 854
/*
 * Initialize ResultRelInfo data for one result relation
 */
static void
initResultRelInfo(ResultRelInfo *resultRelInfo,
				  Index resultRelationIndex,
				  List *rangeTable,
855 856
				  CmdType operation,
				  bool doInstrument)
857 858 859 860 861 862 863 864 865 866
{
	Oid			resultRelationOid;
	Relation	resultRelationDesc;

	resultRelationOid = getrelid(resultRelationIndex, rangeTable);
	resultRelationDesc = heap_open(resultRelationOid, RowExclusiveLock);

	switch (resultRelationDesc->rd_rel->relkind)
	{
		case RELKIND_SEQUENCE:
867 868
			ereport(ERROR,
					(errcode(ERRCODE_WRONG_OBJECT_TYPE),
869
					 errmsg("cannot change sequence \"%s\"",
B
Bruce Momjian 已提交
870
							RelationGetRelationName(resultRelationDesc))));
871 872
			break;
		case RELKIND_TOASTVALUE:
873 874
			ereport(ERROR,
					(errcode(ERRCODE_WRONG_OBJECT_TYPE),
875
					 errmsg("cannot change TOAST relation \"%s\"",
B
Bruce Momjian 已提交
876
							RelationGetRelationName(resultRelationDesc))));
877 878
			break;
		case RELKIND_VIEW:
879 880
			ereport(ERROR,
					(errcode(ERRCODE_WRONG_OBJECT_TYPE),
881
					 errmsg("cannot change view \"%s\"",
B
Bruce Momjian 已提交
882
							RelationGetRelationName(resultRelationDesc))));
883 884 885 886 887 888 889 890 891 892
			break;
	}

	MemSet(resultRelInfo, 0, sizeof(ResultRelInfo));
	resultRelInfo->type = T_ResultRelInfo;
	resultRelInfo->ri_RangeTableIndex = resultRelationIndex;
	resultRelInfo->ri_RelationDesc = resultRelationDesc;
	resultRelInfo->ri_NumIndices = 0;
	resultRelInfo->ri_IndexRelationDescs = NULL;
	resultRelInfo->ri_IndexRelationInfo = NULL;
893 894
	/* make a copy so as not to depend on relcache info not changing... */
	resultRelInfo->ri_TrigDesc = CopyTriggerDesc(resultRelationDesc->trigdesc);
895 896
	if (resultRelInfo->ri_TrigDesc)
	{
B
Bruce Momjian 已提交
897
		int			n = resultRelInfo->ri_TrigDesc->numtriggers;
898 899 900 901 902 903 904 905 906 907 908 909 910

		resultRelInfo->ri_TrigFunctions = (FmgrInfo *)
			palloc0(n * sizeof(FmgrInfo));
		if (doInstrument)
			resultRelInfo->ri_TrigInstrument = InstrAlloc(n);
		else
			resultRelInfo->ri_TrigInstrument = NULL;
	}
	else
	{
		resultRelInfo->ri_TrigFunctions = NULL;
		resultRelInfo->ri_TrigInstrument = NULL;
	}
911 912 913 914 915
	resultRelInfo->ri_ConstraintExprs = NULL;
	resultRelInfo->ri_junkFilter = NULL;

	/*
	 * If there are indices on the result relation, open them and save
B
Bruce Momjian 已提交
916 917 918
	 * descriptors in the result relation info, so that we can add new index
	 * entries for the tuples we add/update.  We need not do this for a
	 * DELETE, however, since deletion doesn't affect indexes.
919 920 921 922 923 924
	 */
	if (resultRelationDesc->rd_rel->relhasindex &&
		operation != CMD_DELETE)
		ExecOpenIndices(resultRelInfo);
}

925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981
/*
 *		ExecContextForcesOids
 *
 * This is pretty grotty: when doing INSERT, UPDATE, or SELECT INTO,
 * we need to ensure that result tuples have space for an OID iff they are
 * going to be stored into a relation that has OIDs.  In other contexts
 * we are free to choose whether to leave space for OIDs in result tuples
 * (we generally don't want to, but we do if a physical-tlist optimization
 * is possible).  This routine checks the plan context and returns TRUE if the
 * choice is forced, FALSE if the choice is not forced.  In the TRUE case,
 * *hasoids is set to the required value.
 *
 * One reason this is ugly is that all plan nodes in the plan tree will emit
 * tuples with space for an OID, though we really only need the topmost node
 * to do so.  However, node types like Sort don't project new tuples but just
 * return their inputs, and in those cases the requirement propagates down
 * to the input node.  Eventually we might make this code smart enough to
 * recognize how far down the requirement really goes, but for now we just
 * make all plan nodes do the same thing if the top level forces the choice.
 *
 * We assume that estate->es_result_relation_info is already set up to
 * describe the target relation.  Note that in an UPDATE that spans an
 * inheritance tree, some of the target relations may have OIDs and some not.
 * We have to make the decisions on a per-relation basis as we initialize
 * each of the child plans of the topmost Append plan.
 *
 * SELECT INTO is even uglier, because we don't have the INTO relation's
 * descriptor available when this code runs; we have to look aside at a
 * flag set by InitPlan().
 */
bool
ExecContextForcesOids(PlanState *planstate, bool *hasoids)
{
	if (planstate->state->es_select_into)
	{
		*hasoids = planstate->state->es_into_oids;
		return true;
	}
	else
	{
		ResultRelInfo *ri = planstate->state->es_result_relation_info;

		if (ri != NULL)
		{
			Relation	rel = ri->ri_RelationDesc;

			if (rel != NULL)
			{
				*hasoids = rel->rd_rel->relhasoids;
				return true;
			}
		}
	}

	return false;
}

982
/* ----------------------------------------------------------------
983
 *		ExecEndPlan
984
 *
985
 *		Cleans up the query plan -- closes files and frees up storage
986 987 988 989 990 991
 *
 * NOTE: we are no longer very worried about freeing storage per se
 * in this code; FreeExecutorState should be guaranteed to release all
 * memory that needs to be released.  What we are worried about doing
 * is closing relations and dropping buffer pins.  Thus, for example,
 * tuple tables must be cleared or dropped to ensure pins are released.
992 993
 * ----------------------------------------------------------------
 */
994
void
995
ExecEndPlan(PlanState *planstate, EState *estate)
996
{
997 998
	ResultRelInfo *resultRelInfo;
	int			i;
999
	ListCell   *l;
1000

1001 1002 1003 1004 1005 1006
	/*
	 * shut down any PlanQual processing we were doing
	 */
	if (estate->es_evalPlanQual != NULL)
		EndEvalPlanQual(estate);

B
Bruce Momjian 已提交
1007
	/*
1008
	 * shut down the node-type-specific query processing
1009
	 */
1010
	ExecEndNode(planstate);
1011

B
Bruce Momjian 已提交
1012
	/*
B
Bruce Momjian 已提交
1013
	 * destroy the executor "tuple" table.
1014
	 */
1015 1016
	ExecDropTupleTable(estate->es_tupleTable, true);
	estate->es_tupleTable = NULL;
1017

B
Bruce Momjian 已提交
1018
	/*
B
Bruce Momjian 已提交
1019
	 * close the result relation(s) if any, but hold locks until xact commit.
1020
	 */
1021 1022
	resultRelInfo = estate->es_result_relations;
	for (i = estate->es_num_result_relations; i > 0; i--)
1023
	{
1024 1025 1026 1027
		/* Close indices and then the relation itself */
		ExecCloseIndices(resultRelInfo);
		heap_close(resultRelInfo->ri_RelationDesc, NoLock);
		resultRelInfo++;
1028 1029
	}

B
Bruce Momjian 已提交
1030
	/*
1031
	 * close the "into" relation if necessary, again keeping lock
1032
	 */
1033
	if (estate->es_into_relation_descriptor != NULL)
1034 1035
	{
		/*
B
Bruce Momjian 已提交
1036 1037 1038 1039
		 * If we skipped using WAL, and it's not a temp relation, we must
		 * force the relation down to disk before it's safe to commit the
		 * transaction.  This requires forcing out any dirty buffers and then
		 * doing a forced fsync.
1040 1041 1042 1043 1044
		 */
		if (!estate->es_into_relation_use_wal &&
			!estate->es_into_relation_descriptor->rd_istemp)
		{
			FlushRelationBuffers(estate->es_into_relation_descriptor);
1045
			/* FlushRelationBuffers will have opened rd_smgr */
1046 1047 1048
			smgrimmedsync(estate->es_into_relation_descriptor->rd_smgr);
		}

1049
		heap_close(estate->es_into_relation_descriptor, NoLock);
1050
	}
1051 1052

	/*
1053
	 * close any relations selected FOR UPDATE/FOR SHARE, again keeping locks
1054
	 */
1055
	foreach(l, estate->es_rowMarks)
1056
	{
1057
		ExecRowMark *erm = lfirst(l);
1058 1059 1060

		heap_close(erm->relation, NoLock);
	}
1061 1062 1063
}

/* ----------------------------------------------------------------
1064 1065
 *		ExecutePlan
 *
1066
 *		processes the query plan to retrieve 'numberTuples' tuples in the
1067
 *		direction specified.
1068
 *
1069
 *		Retrieves all tuples if numberTuples is 0
1070
 *
1071
 *		result is either a slot containing the last tuple in the case
1072
 *		of a SELECT or NULL otherwise.
1073
 *
1074 1075
 * Note: the ctid attribute is a 'junk' attribute that is removed before the
 * user can see it
1076 1077 1078
 * ----------------------------------------------------------------
 */
static TupleTableSlot *
1079
ExecutePlan(EState *estate,
1080
			PlanState *planstate,
1081
			CmdType operation,
1082
			long numberTuples,
1083
			ScanDirection direction,
1084
			DestReceiver *dest)
1085
{
B
Bruce Momjian 已提交
1086 1087 1088 1089 1090 1091
	JunkFilter *junkfilter;
	TupleTableSlot *slot;
	ItemPointer tupleid = NULL;
	ItemPointerData tuple_ctid;
	long		current_tuple_count;
	TupleTableSlot *result;
1092

B
Bruce Momjian 已提交
1093
	/*
B
Bruce Momjian 已提交
1094
	 * initialize local variables
1095
	 */
1096 1097 1098 1099
	slot = NULL;
	current_tuple_count = 0;
	result = NULL;

B
Bruce Momjian 已提交
1100 1101
	/*
	 * Set the direction.
1102
	 */
1103 1104
	estate->es_direction = direction;

1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120
	/*
	 * Process BEFORE EACH STATEMENT triggers
	 */
	switch (operation)
	{
		case CMD_UPDATE:
			ExecBSUpdateTriggers(estate, estate->es_result_relation_info);
			break;
		case CMD_DELETE:
			ExecBSDeleteTriggers(estate, estate->es_result_relation_info);
			break;
		case CMD_INSERT:
			ExecBSInsertTriggers(estate, estate->es_result_relation_info);
			break;
		default:
			/* do nothing */
1121
			break;
1122 1123
	}

B
Bruce Momjian 已提交
1124
	/*
B
Bruce Momjian 已提交
1125
	 * Loop until we've processed the proper number of tuples from the plan.
1126 1127 1128 1129
	 */

	for (;;)
	{
1130 1131
		/* Reset the per-output-tuple exprcontext */
		ResetPerTupleExprContext(estate);
B
Bruce Momjian 已提交
1132

B
Bruce Momjian 已提交
1133
		/*
B
Bruce Momjian 已提交
1134
		 * Execute the plan and obtain a tuple
1135
		 */
B
Bruce Momjian 已提交
1136
lnext:	;
1137 1138 1139 1140
		if (estate->es_useEvalPlan)
		{
			slot = EvalPlanQualNext(estate);
			if (TupIsNull(slot))
1141
				slot = ExecProcNode(planstate);
1142 1143
		}
		else
1144
			slot = ExecProcNode(planstate);
1145

B
Bruce Momjian 已提交
1146
		/*
B
Bruce Momjian 已提交
1147 1148
		 * if the tuple is null, then we assume there is nothing more to
		 * process so we just return null...
1149 1150 1151 1152 1153
		 */
		if (TupIsNull(slot))
		{
			result = NULL;
			break;
1154 1155
		}

B
Bruce Momjian 已提交
1156
		/*
B
Bruce Momjian 已提交
1157 1158
		 * if we have a junk filter, then project a new tuple with the junk
		 * removed.
1159
		 *
1160
		 * Store this new "clean" tuple in the junkfilter's resultSlot.
B
Bruce Momjian 已提交
1161 1162
		 * (Formerly, we stored it back over the "dirty" tuple, which is WRONG
		 * because that tuple slot has the wrong descriptor.)
1163
		 *
B
Bruce Momjian 已提交
1164
		 * Also, extract all the junk information we need.
1165
		 */
1166
		if ((junkfilter = estate->es_junkFilter) != NULL)
1167
		{
1168 1169
			Datum		datum;
			bool		isNull;
1170

B
Bruce Momjian 已提交
1171
			/*
1172 1173 1174 1175 1176 1177 1178 1179 1180
			 * extract the 'ctid' junk attribute.
			 */
			if (operation == CMD_UPDATE || operation == CMD_DELETE)
			{
				if (!ExecGetJunkAttribute(junkfilter,
										  slot,
										  "ctid",
										  &datum,
										  &isNull))
1181
					elog(ERROR, "could not find junk ctid column");
1182

1183
				/* shouldn't ever get a null result... */
1184
				if (isNull)
1185
					elog(ERROR, "ctid is NULL");
1186 1187

				tupleid = (ItemPointer) DatumGetPointer(datum);
B
Bruce Momjian 已提交
1188
				tuple_ctid = *tupleid;	/* make sure we don't free the ctid!! */
1189 1190
				tupleid = &tuple_ctid;
			}
B
Bruce Momjian 已提交
1191

1192 1193 1194
			/*
			 * Process any FOR UPDATE or FOR SHARE locking requested.
			 */
1195
			else if (estate->es_rowMarks != NIL)
1196
			{
1197
				ListCell   *l;
1198

B
Bruce Momjian 已提交
1199
		lmark:	;
1200
				foreach(l, estate->es_rowMarks)
1201
				{
1202
					ExecRowMark *erm = lfirst(l);
1203
					HeapTupleData tuple;
1204 1205 1206
					Buffer		buffer;
					ItemPointerData update_ctid;
					TransactionId update_xmax;
1207
					TupleTableSlot *newSlot;
B
Bruce Momjian 已提交
1208 1209
					LockTupleMode lockmode;
					HTSU_Result test;
1210

1211 1212 1213 1214 1215
					if (!ExecGetJunkAttribute(junkfilter,
											  slot,
											  erm->resname,
											  &datum,
											  &isNull))
1216
						elog(ERROR, "could not find junk \"%s\" column",
1217
							 erm->resname);
1218

1219
					/* shouldn't ever get a null result... */
1220
					if (isNull)
1221
						elog(ERROR, "\"%s\" is NULL", erm->resname);
1222

1223 1224
					tuple.t_self = *((ItemPointer) DatumGetPointer(datum));

1225 1226 1227 1228 1229 1230
					if (estate->es_forUpdate)
						lockmode = LockTupleExclusive;
					else
						lockmode = LockTupleShared;

					test = heap_lock_tuple(erm->relation, &tuple, &buffer,
1231 1232 1233
										   &update_ctid, &update_xmax,
										   estate->es_snapshot->curcid,
										   lockmode, estate->es_rowNoWait);
1234 1235 1236 1237
					ReleaseBuffer(buffer);
					switch (test)
					{
						case HeapTupleSelfUpdated:
1238 1239 1240
							/* treat it as deleted; do not process */
							goto lnext;

1241 1242 1243 1244
						case HeapTupleMayBeUpdated:
							break;

						case HeapTupleUpdated:
1245
							if (IsXactIsoLevelSerializable)
1246
								ereport(ERROR,
B
Bruce Momjian 已提交
1247 1248
								 (errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
								  errmsg("could not serialize access due to concurrent update")));
1249 1250
							if (!ItemPointerEquals(&update_ctid,
												   &tuple.t_self))
1251
							{
1252 1253 1254 1255
								/* updated, so look at updated version */
								newSlot = EvalPlanQual(estate,
													   erm->rti,
													   &update_ctid,
1256 1257
													   update_xmax,
													   estate->es_snapshot->curcid);
1258
								if (!TupIsNull(newSlot))
1259 1260 1261 1262 1263 1264
								{
									slot = newSlot;
									estate->es_useEvalPlan = true;
									goto lmark;
								}
							}
B
Bruce Momjian 已提交
1265 1266 1267

							/*
							 * if tuple was deleted or PlanQual failed for
B
Bruce Momjian 已提交
1268
							 * updated tuple - we must not return this tuple!
1269 1270
							 */
							goto lnext;
1271 1272

						default:
1273
							elog(ERROR, "unrecognized heap_lock_tuple status: %u",
1274
								 test);
1275
							return NULL;
1276 1277 1278
					}
				}
			}
1279

B
Bruce Momjian 已提交
1280
			/*
1281 1282 1283
			 * Finally create a new "clean" tuple with all junk attributes
			 * removed
			 */
1284
			slot = ExecFilterJunk(junkfilter, slot);
1285
		}
1286

B
Bruce Momjian 已提交
1287
		/*
B
Bruce Momjian 已提交
1288 1289 1290
		 * now that we have a tuple, do the appropriate thing with it.. either
		 * return it to the user, add it to a relation someplace, delete it
		 * from a relation, or modify some of its attributes.
1291 1292 1293
		 */
		switch (operation)
		{
1294
			case CMD_SELECT:
B
Bruce Momjian 已提交
1295
				ExecSelect(slot,	/* slot containing tuple */
1296
						   dest,	/* destination's tuple-receiver obj */
1297
						   estate);
1298 1299
				result = slot;
				break;
1300

1301
			case CMD_INSERT:
1302
				ExecInsert(slot, tupleid, estate);
1303 1304
				result = NULL;
				break;
1305

1306 1307 1308 1309
			case CMD_DELETE:
				ExecDelete(slot, tupleid, estate);
				result = NULL;
				break;
1310

1311
			case CMD_UPDATE:
1312
				ExecUpdate(slot, tupleid, estate);
1313 1314
				result = NULL;
				break;
1315

1316
			default:
1317 1318
				elog(ERROR, "unrecognized operation code: %d",
					 (int) operation);
1319
				result = NULL;
1320
				break;
1321
		}
B
Bruce Momjian 已提交
1322

B
Bruce Momjian 已提交
1323
		/*
B
Bruce Momjian 已提交
1324 1325 1326
		 * check our tuple count.. if we've processed the proper number then
		 * quit, else loop again and process more tuples.  Zero numberTuples
		 * means no limit.
1327
		 */
1328
		current_tuple_count++;
1329
		if (numberTuples && numberTuples == current_tuple_count)
1330
			break;
1331
	}
1332

1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348
	/*
	 * Process AFTER EACH STATEMENT triggers
	 */
	switch (operation)
	{
		case CMD_UPDATE:
			ExecASUpdateTriggers(estate, estate->es_result_relation_info);
			break;
		case CMD_DELETE:
			ExecASDeleteTriggers(estate, estate->es_result_relation_info);
			break;
		case CMD_INSERT:
			ExecASInsertTriggers(estate, estate->es_result_relation_info);
			break;
		default:
			/* do nothing */
1349
			break;
1350 1351
	}

B
Bruce Momjian 已提交
1352
	/*
B
Bruce Momjian 已提交
1353
	 * here, result is either a slot containing a tuple in the case of a
1354
	 * SELECT or NULL otherwise.
1355
	 */
1356
	return result;
1357 1358 1359
}

/* ----------------------------------------------------------------
1360
 *		ExecSelect
1361
 *
1362
 *		SELECTs are easy.. we just pass the tuple to the appropriate
1363
 *		print function.  The only complexity is when we do a
1364
 *		"SELECT INTO", in which case we insert the tuple into
1365 1366
 *		the appropriate relation (note: this is a newly created relation
 *		so we don't need to worry about indices or locks.)
1367 1368 1369
 * ----------------------------------------------------------------
 */
static void
1370
ExecSelect(TupleTableSlot *slot,
1371
		   DestReceiver *dest,
1372
		   EState *estate)
1373
{
B
Bruce Momjian 已提交
1374
	/*
B
Bruce Momjian 已提交
1375
	 * insert the tuple into the "into relation"
1376 1377
	 *
	 * XXX this probably ought to be replaced by a separate destination
1378 1379 1380
	 */
	if (estate->es_into_relation_descriptor != NULL)
	{
1381 1382 1383
		HeapTuple	tuple;

		tuple = ExecCopySlotTuple(slot);
1384
		heap_insert(estate->es_into_relation_descriptor, tuple,
1385 1386 1387
					estate->es_snapshot->curcid,
					estate->es_into_relation_use_wal,
					false);		/* never any point in using FSM */
1388 1389
		/* we know there are no indexes to update */
		heap_freetuple(tuple);
1390 1391 1392
		IncrAppended();
	}

B
Bruce Momjian 已提交
1393
	/*
1394
	 * send the tuple to the destination
1395
	 */
1396
	(*dest->receiveSlot) (slot, dest);
1397 1398
	IncrRetrieved();
	(estate->es_processed)++;
1399 1400 1401
}

/* ----------------------------------------------------------------
1402
 *		ExecInsert
1403
 *
1404
 *		INSERTs are trickier.. we have to insert the tuple into
1405 1406
 *		the base relation and insert appropriate tuples into the
 *		index relations.
1407 1408 1409
 * ----------------------------------------------------------------
 */
static void
1410
ExecInsert(TupleTableSlot *slot,
1411
		   ItemPointer tupleid,
1412
		   EState *estate)
1413
{
1414
	HeapTuple	tuple;
1415
	ResultRelInfo *resultRelInfo;
1416 1417
	Relation	resultRelationDesc;
	Oid			newId;
1418

B
Bruce Momjian 已提交
1419
	/*
B
Bruce Momjian 已提交
1420 1421
	 * get the heap tuple out of the tuple table slot, making sure we have a
	 * writable copy
1422
	 */
1423
	tuple = ExecMaterializeSlot(slot);
1424

B
Bruce Momjian 已提交
1425
	/*
1426
	 * get information on the (current) result relation
1427
	 */
1428 1429
	resultRelInfo = estate->es_result_relation_info;
	resultRelationDesc = resultRelInfo->ri_RelationDesc;
1430 1431

	/* BEFORE ROW INSERT Triggers */
1432
	if (resultRelInfo->ri_TrigDesc &&
B
Bruce Momjian 已提交
1433
		resultRelInfo->ri_TrigDesc->n_before_row[TRIGGER_EVENT_INSERT] > 0)
1434
	{
1435
		HeapTuple	newtuple;
1436

1437
		newtuple = ExecBRInsertTriggers(estate, resultRelInfo, tuple);
1438 1439 1440 1441 1442 1443

		if (newtuple == NULL)	/* "do nothing" */
			return;

		if (newtuple != tuple)	/* modified by Trigger(s) */
		{
1444
			/*
1445 1446
			 * Put the modified tuple into a slot for convenience of routines
			 * below.  We assume the tuple was allocated in per-tuple memory
B
Bruce Momjian 已提交
1447 1448
			 * context, and therefore will go away by itself. The tuple table
			 * slot should not try to clear it.
1449
			 */
1450 1451 1452 1453 1454 1455 1456 1457
			TupleTableSlot *newslot = estate->es_trig_tuple_slot;

			if (newslot->tts_tupleDescriptor != slot->tts_tupleDescriptor)
				ExecSetSlotDescriptor(newslot,
									  slot->tts_tupleDescriptor,
									  false);
			ExecStoreTuple(newtuple, newslot, InvalidBuffer, false);
			slot = newslot;
1458
			tuple = newtuple;
1459 1460 1461
		}
	}

B
Bruce Momjian 已提交
1462
	/*
1463
	 * Check the constraints of the tuple
1464 1465
	 */
	if (resultRelationDesc->rd_att->constr)
1466
		ExecConstraints(resultRelInfo, slot, estate);
1467

B
Bruce Momjian 已提交
1468
	/*
B
Bruce Momjian 已提交
1469
	 * insert the tuple
1470
	 *
B
Bruce Momjian 已提交
1471 1472
	 * Note: heap_insert returns the tid (location) of the new tuple in the
	 * t_self field.
1473
	 */
1474
	newId = heap_insert(resultRelationDesc, tuple,
1475 1476
						estate->es_snapshot->curcid,
						true, true);
1477

1478
	IncrAppended();
1479 1480
	(estate->es_processed)++;
	estate->es_lastoid = newId;
T
Tom Lane 已提交
1481
	setLastTid(&(tuple->t_self));
1482

B
Bruce Momjian 已提交
1483
	/*
1484
	 * insert index entries for tuple
1485
	 */
1486
	if (resultRelInfo->ri_NumIndices > 0)
1487
		ExecInsertIndexTuples(slot, &(tuple->t_self), estate, false);
1488 1489

	/* AFTER ROW INSERT Triggers */
1490
	ExecARInsertTriggers(estate, resultRelInfo, tuple);
1491 1492 1493
}

/* ----------------------------------------------------------------
1494
 *		ExecDelete
1495
 *
1496 1497
 *		DELETE is like UPDATE, except that we delete the tuple and no
 *		index modifications are needed
1498 1499 1500
 * ----------------------------------------------------------------
 */
static void
1501
ExecDelete(TupleTableSlot *slot,
1502
		   ItemPointer tupleid,
1503
		   EState *estate)
1504
{
1505
	ResultRelInfo *resultRelInfo;
B
Bruce Momjian 已提交
1506
	Relation	resultRelationDesc;
B
Bruce Momjian 已提交
1507
	HTSU_Result result;
1508 1509
	ItemPointerData update_ctid;
	TransactionId update_xmax;
1510

B
Bruce Momjian 已提交
1511
	/*
1512
	 * get information on the (current) result relation
1513
	 */
1514 1515
	resultRelInfo = estate->es_result_relation_info;
	resultRelationDesc = resultRelInfo->ri_RelationDesc;
1516 1517

	/* BEFORE ROW DELETE Triggers */
1518
	if (resultRelInfo->ri_TrigDesc &&
B
Bruce Momjian 已提交
1519
		resultRelInfo->ri_TrigDesc->n_before_row[TRIGGER_EVENT_DELETE] > 0)
1520
	{
1521
		bool		dodelete;
1522

1523
		dodelete = ExecBRDeleteTriggers(estate, resultRelInfo, tupleid,
1524
										estate->es_snapshot->curcid);
1525 1526 1527 1528 1529

		if (!dodelete)			/* "do nothing" */
			return;
	}

V
Vadim B. Mikheev 已提交
1530
	/*
B
Bruce Momjian 已提交
1531
	 * delete the tuple
1532
	 *
1533 1534
	 * Note: if es_crosscheck_snapshot isn't InvalidSnapshot, we check that
	 * the row to be deleted is visible to that snapshot, and throw a can't-
B
Bruce Momjian 已提交
1535
	 * serialize error if not.	This is a special-case behavior needed for
1536
	 * referential integrity updates in serializable transactions.
1537
	 */
1538
ldelete:;
1539
	result = heap_delete(resultRelationDesc, tupleid,
1540
						 &update_ctid, &update_xmax,
1541 1542
						 estate->es_snapshot->curcid,
						 estate->es_crosscheck_snapshot,
B
Bruce Momjian 已提交
1543
						 true /* wait for commit */ );
V
Vadim B. Mikheev 已提交
1544 1545 1546
	switch (result)
	{
		case HeapTupleSelfUpdated:
1547
			/* already deleted by self; nothing to do */
V
Vadim B. Mikheev 已提交
1548 1549 1550 1551 1552 1553
			return;

		case HeapTupleMayBeUpdated:
			break;

		case HeapTupleUpdated:
1554
			if (IsXactIsoLevelSerializable)
1555 1556
				ereport(ERROR,
						(errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
1557
						 errmsg("could not serialize access due to concurrent update")));
1558
			else if (!ItemPointerEquals(tupleid, &update_ctid))
1559
			{
1560
				TupleTableSlot *epqslot;
1561

1562 1563 1564
				epqslot = EvalPlanQual(estate,
									   resultRelInfo->ri_RangeTableIndex,
									   &update_ctid,
1565 1566
									   update_xmax,
									   estate->es_snapshot->curcid);
V
Vadim B. Mikheev 已提交
1567
				if (!TupIsNull(epqslot))
1568
				{
1569
					*tupleid = update_ctid;
1570 1571 1572
					goto ldelete;
				}
			}
1573
			/* tuple already deleted; nothing to do */
V
Vadim B. Mikheev 已提交
1574 1575 1576
			return;

		default:
1577
			elog(ERROR, "unrecognized heap_delete status: %u", result);
V
Vadim B. Mikheev 已提交
1578 1579
			return;
	}
1580 1581 1582 1583

	IncrDeleted();
	(estate->es_processed)++;

B
Bruce Momjian 已提交
1584
	/*
B
Bruce Momjian 已提交
1585
	 * Note: Normally one would think that we have to delete index tuples
1586
	 * associated with the heap tuple now...
1587
	 *
1588 1589 1590
	 * ... but in POSTGRES, we have no need to do this because VACUUM will
	 * take care of it later.  We can't delete index tuples immediately
	 * anyway, since the tuple is still visible to other transactions.
1591 1592 1593
	 */

	/* AFTER ROW DELETE Triggers */
1594
	ExecARDeleteTriggers(estate, resultRelInfo, tupleid);
1595 1596 1597
}

/* ----------------------------------------------------------------
1598
 *		ExecUpdate
1599
 *
1600 1601 1602 1603
 *		note: we can't run UPDATE queries with transactions
 *		off because UPDATEs are actually INSERTs and our
 *		scan will mistakenly loop forever, updating the tuple
 *		it just inserted..	This should be fixed but until it
1604 1605
 *		is, we don't want to get stuck in an infinite loop
 *		which corrupts your database..
1606 1607 1608
 * ----------------------------------------------------------------
 */
static void
1609
ExecUpdate(TupleTableSlot *slot,
B
Bruce Momjian 已提交
1610 1611
		   ItemPointer tupleid,
		   EState *estate)
1612
{
B
Bruce Momjian 已提交
1613
	HeapTuple	tuple;
1614
	ResultRelInfo *resultRelInfo;
B
Bruce Momjian 已提交
1615
	Relation	resultRelationDesc;
B
Bruce Momjian 已提交
1616
	HTSU_Result result;
1617 1618
	ItemPointerData update_ctid;
	TransactionId update_xmax;
1619

B
Bruce Momjian 已提交
1620
	/*
B
Bruce Momjian 已提交
1621
	 * abort the operation if not running transactions
1622 1623
	 */
	if (IsBootstrapProcessingMode())
1624
		elog(ERROR, "cannot UPDATE during bootstrap");
1625

B
Bruce Momjian 已提交
1626
	/*
B
Bruce Momjian 已提交
1627 1628
	 * get the heap tuple out of the tuple table slot, making sure we have a
	 * writable copy
1629
	 */
1630
	tuple = ExecMaterializeSlot(slot);
1631

B
Bruce Momjian 已提交
1632
	/*
1633
	 * get information on the (current) result relation
1634
	 */
1635 1636
	resultRelInfo = estate->es_result_relation_info;
	resultRelationDesc = resultRelInfo->ri_RelationDesc;
1637 1638

	/* BEFORE ROW UPDATE Triggers */
1639
	if (resultRelInfo->ri_TrigDesc &&
B
Bruce Momjian 已提交
1640
		resultRelInfo->ri_TrigDesc->n_before_row[TRIGGER_EVENT_UPDATE] > 0)
1641
	{
1642
		HeapTuple	newtuple;
1643

1644
		newtuple = ExecBRUpdateTriggers(estate, resultRelInfo,
1645
										tupleid, tuple,
1646
										estate->es_snapshot->curcid);
1647 1648 1649 1650 1651 1652

		if (newtuple == NULL)	/* "do nothing" */
			return;

		if (newtuple != tuple)	/* modified by Trigger(s) */
		{
1653
			/*
1654 1655
			 * Put the modified tuple into a slot for convenience of routines
			 * below.  We assume the tuple was allocated in per-tuple memory
B
Bruce Momjian 已提交
1656 1657
			 * context, and therefore will go away by itself. The tuple table
			 * slot should not try to clear it.
1658
			 */
1659 1660 1661 1662 1663 1664 1665 1666
			TupleTableSlot *newslot = estate->es_trig_tuple_slot;

			if (newslot->tts_tupleDescriptor != slot->tts_tupleDescriptor)
				ExecSetSlotDescriptor(newslot,
									  slot->tts_tupleDescriptor,
									  false);
			ExecStoreTuple(newtuple, newslot, InvalidBuffer, false);
			slot = newslot;
1667
			tuple = newtuple;
1668 1669 1670
		}
	}

B
Bruce Momjian 已提交
1671
	/*
1672
	 * Check the constraints of the tuple
1673
	 *
1674 1675
	 * If we generate a new candidate tuple after EvalPlanQual testing, we
	 * must loop back here and recheck constraints.  (We don't need to redo
B
Bruce Momjian 已提交
1676 1677 1678
	 * triggers, however.  If there are any BEFORE triggers then trigger.c
	 * will have done heap_lock_tuple to lock the correct tuple, so there's no
	 * need to do them again.)
1679
	 */
1680
lreplace:;
1681
	if (resultRelationDesc->rd_att->constr)
1682
		ExecConstraints(resultRelInfo, slot, estate);
1683

V
Vadim B. Mikheev 已提交
1684
	/*
B
Bruce Momjian 已提交
1685
	 * replace the heap tuple
1686
	 *
1687 1688
	 * Note: if es_crosscheck_snapshot isn't InvalidSnapshot, we check that
	 * the row to be updated is visible to that snapshot, and throw a can't-
B
Bruce Momjian 已提交
1689
	 * serialize error if not.	This is a special-case behavior needed for
1690
	 * referential integrity updates in serializable transactions.
1691
	 */
1692
	result = heap_update(resultRelationDesc, tupleid, tuple,
1693
						 &update_ctid, &update_xmax,
1694 1695
						 estate->es_snapshot->curcid,
						 estate->es_crosscheck_snapshot,
B
Bruce Momjian 已提交
1696
						 true /* wait for commit */ );
V
Vadim B. Mikheev 已提交
1697 1698 1699
	switch (result)
	{
		case HeapTupleSelfUpdated:
1700
			/* already deleted by self; nothing to do */
V
Vadim B. Mikheev 已提交
1701 1702 1703 1704 1705 1706
			return;

		case HeapTupleMayBeUpdated:
			break;

		case HeapTupleUpdated:
1707
			if (IsXactIsoLevelSerializable)
1708 1709
				ereport(ERROR,
						(errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
1710
						 errmsg("could not serialize access due to concurrent update")));
1711
			else if (!ItemPointerEquals(tupleid, &update_ctid))
1712
			{
1713
				TupleTableSlot *epqslot;
1714

1715 1716 1717
				epqslot = EvalPlanQual(estate,
									   resultRelInfo->ri_RangeTableIndex,
									   &update_ctid,
1718 1719
									   update_xmax,
									   estate->es_snapshot->curcid);
V
Vadim B. Mikheev 已提交
1720
				if (!TupIsNull(epqslot))
1721
				{
1722
					*tupleid = update_ctid;
1723 1724
					slot = ExecFilterJunk(estate->es_junkFilter, epqslot);
					tuple = ExecMaterializeSlot(slot);
1725 1726 1727
					goto lreplace;
				}
			}
1728
			/* tuple already deleted; nothing to do */
V
Vadim B. Mikheev 已提交
1729 1730 1731
			return;

		default:
1732
			elog(ERROR, "unrecognized heap_update status: %u", result);
V
Vadim B. Mikheev 已提交
1733
			return;
1734 1735 1736 1737 1738
	}

	IncrReplaced();
	(estate->es_processed)++;

B
Bruce Momjian 已提交
1739
	/*
B
Bruce Momjian 已提交
1740 1741 1742
	 * Note: instead of having to update the old index tuples associated with
	 * the heap tuple, all we do is form and insert new index tuples. This is
	 * because UPDATEs are actually DELETEs and INSERTs, and index tuple
1743
	 * deletion is done later by VACUUM (see notes in ExecDelete).	All we do
1744
	 * here is insert new index tuples.  -cim 9/27/89
1745 1746
	 */

B
Bruce Momjian 已提交
1747
	/*
1748
	 * insert index entries for tuple
1749
	 *
B
Bruce Momjian 已提交
1750 1751
	 * Note: heap_update returns the tid (location) of the new tuple in the
	 * t_self field.
1752
	 */
1753
	if (resultRelInfo->ri_NumIndices > 0)
1754
		ExecInsertIndexTuples(slot, &(tuple->t_self), estate, false);
1755 1756

	/* AFTER ROW UPDATE Triggers */
1757
	ExecARUpdateTriggers(estate, resultRelInfo, tupleid, tuple);
1758
}
V
Vadim B. Mikheev 已提交
1759

1760
static const char *
1761 1762
ExecRelCheck(ResultRelInfo *resultRelInfo,
			 TupleTableSlot *slot, EState *estate)
V
Vadim B. Mikheev 已提交
1763
{
1764
	Relation	rel = resultRelInfo->ri_RelationDesc;
1765 1766
	int			ncheck = rel->rd_att->constr->num_check;
	ConstrCheck *check = rel->rd_att->constr->check;
1767
	ExprContext *econtext;
1768
	MemoryContext oldContext;
1769 1770
	List	   *qual;
	int			i;
1771

1772 1773
	/*
	 * If first time through for this result relation, build expression
B
Bruce Momjian 已提交
1774 1775
	 * nodetrees for rel's constraint expressions.  Keep them in the per-query
	 * memory context so they'll survive throughout the query.
1776 1777 1778 1779 1780 1781 1782 1783
	 */
	if (resultRelInfo->ri_ConstraintExprs == NULL)
	{
		oldContext = MemoryContextSwitchTo(estate->es_query_cxt);
		resultRelInfo->ri_ConstraintExprs =
			(List **) palloc(ncheck * sizeof(List *));
		for (i = 0; i < ncheck; i++)
		{
1784 1785
			/* ExecQual wants implicit-AND form */
			qual = make_ands_implicit(stringToNode(check[i].ccbin));
1786
			resultRelInfo->ri_ConstraintExprs[i] = (List *)
1787
				ExecPrepareExpr((Expr *) qual, estate);
1788 1789 1790 1791
		}
		MemoryContextSwitchTo(oldContext);
	}

1792
	/*
B
Bruce Momjian 已提交
1793 1794
	 * We will use the EState's per-tuple context for evaluating constraint
	 * expressions (creating it if it's not already there).
1795
	 */
1796
	econtext = GetPerTupleExprContext(estate);
1797

1798 1799 1800 1801
	/* Arrange for econtext's scan tuple to be the tuple under test */
	econtext->ecxt_scantuple = slot;

	/* And evaluate the constraints */
1802 1803
	for (i = 0; i < ncheck; i++)
	{
1804
		qual = resultRelInfo->ri_ConstraintExprs[i];
1805

1806 1807
		/*
		 * NOTE: SQL92 specifies that a NULL result from a constraint
1808 1809
		 * expression is not to be treated as a failure.  Therefore, tell
		 * ExecQual to return TRUE for NULL.
1810
		 */
1811
		if (!ExecQual(qual, econtext, true))
1812
			return check[i].ccname;
1813 1814
	}

1815
	/* NULL result means no error */
1816
	return NULL;
V
Vadim B. Mikheev 已提交
1817 1818
}

1819
void
1820
ExecConstraints(ResultRelInfo *resultRelInfo,
1821
				TupleTableSlot *slot, EState *estate)
V
Vadim B. Mikheev 已提交
1822
{
1823
	Relation	rel = resultRelInfo->ri_RelationDesc;
1824 1825 1826
	TupleConstr *constr = rel->rd_att->constr;

	Assert(constr);
1827

1828
	if (constr->has_not_null)
V
Vadim B. Mikheev 已提交
1829
	{
1830
		int			natts = rel->rd_att->natts;
1831
		int			attrChk;
1832

1833
		for (attrChk = 1; attrChk <= natts; attrChk++)
1834
		{
B
Bruce Momjian 已提交
1835
			if (rel->rd_att->attrs[attrChk - 1]->attnotnull &&
1836
				slot_attisnull(slot, attrChk))
1837 1838
				ereport(ERROR,
						(errcode(ERRCODE_NOT_NULL_VIOLATION),
1839
						 errmsg("null value in column \"%s\" violates not-null constraint",
B
Bruce Momjian 已提交
1840
						NameStr(rel->rd_att->attrs[attrChk - 1]->attname))));
1841 1842 1843
		}
	}

1844
	if (constr->num_check > 0)
1845
	{
B
Bruce Momjian 已提交
1846
		const char *failed;
1847

1848
		if ((failed = ExecRelCheck(resultRelInfo, slot, estate)) != NULL)
1849 1850
			ereport(ERROR,
					(errcode(ERRCODE_CHECK_VIOLATION),
1851
					 errmsg("new row for relation \"%s\" violates check constraint \"%s\"",
1852
							RelationGetRelationName(rel), failed)));
1853
	}
V
Vadim B. Mikheev 已提交
1854
}
1855

1856 1857 1858 1859 1860
/*
 * Check a modified tuple to see if we want to process its updated version
 * under READ COMMITTED rules.
 *
 * See backend/executor/README for some info about how this works.
1861 1862 1863 1864 1865
 *
 *	estate - executor state data
 *	rti - rangetable index of table containing tuple
 *	*tid - t_ctid from the outdated tuple (ie, next updated version)
 *	priorXmax - t_xmax from the outdated tuple
1866
 *	curCid - command ID of current command of my transaction
1867 1868 1869 1870 1871 1872
 *
 * *tid is also an output parameter: it's modified to hold the TID of the
 * latest version of the tuple (note this may be changed even on failure)
 *
 * Returns a slot containing the new candidate update/delete tuple, or
 * NULL if we determine we shouldn't process the row.
1873
 */
B
Bruce Momjian 已提交
1874
TupleTableSlot *
1875
EvalPlanQual(EState *estate, Index rti,
1876
			 ItemPointer tid, TransactionId priorXmax, CommandId curCid)
1877
{
1878 1879
	evalPlanQual *epq;
	EState	   *epqstate;
B
Bruce Momjian 已提交
1880 1881
	Relation	relation;
	HeapTupleData tuple;
1882 1883
	HeapTuple	copyTuple = NULL;
	bool		endNode;
1884 1885 1886

	Assert(rti != 0);

1887 1888 1889 1890 1891 1892 1893 1894
	/*
	 * find relation containing target tuple
	 */
	if (estate->es_result_relation_info != NULL &&
		estate->es_result_relation_info->ri_RangeTableIndex == rti)
		relation = estate->es_result_relation_info->ri_RelationDesc;
	else
	{
1895
		ListCell   *l;
1896 1897

		relation = NULL;
1898
		foreach(l, estate->es_rowMarks)
1899
		{
1900
			if (((ExecRowMark *) lfirst(l))->rti == rti)
1901
			{
1902
				relation = ((ExecRowMark *) lfirst(l))->relation;
1903 1904 1905 1906
				break;
			}
		}
		if (relation == NULL)
1907
			elog(ERROR, "could not find RowMark for RT index %u", rti);
1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919
	}

	/*
	 * fetch tid tuple
	 *
	 * Loop here to deal with updated or busy tuples
	 */
	tuple.t_self = *tid;
	for (;;)
	{
		Buffer		buffer;

1920
		if (heap_fetch(relation, SnapshotDirty, &tuple, &buffer, true, NULL))
1921
		{
1922 1923
			/*
			 * If xmin isn't what we're expecting, the slot must have been
B
Bruce Momjian 已提交
1924 1925 1926
			 * recycled and reused for an unrelated tuple.	This implies that
			 * the latest version of the row was deleted, so we need do
			 * nothing.  (Should be safe to examine xmin without getting
1927 1928 1929 1930 1931 1932 1933 1934 1935
			 * buffer's content lock, since xmin never changes in an existing
			 * tuple.)
			 */
			if (!TransactionIdEquals(HeapTupleHeaderGetXmin(tuple.t_data),
									 priorXmax))
			{
				ReleaseBuffer(buffer);
				return NULL;
			}
1936

1937
			/* otherwise xmin should not be dirty... */
1938
			if (TransactionIdIsValid(SnapshotDirty->xmin))
1939
				elog(ERROR, "t_xmin is uncommitted in tuple to be updated");
1940 1941

			/*
B
Bruce Momjian 已提交
1942 1943
			 * If tuple is being updated by other transaction then we have to
			 * wait for its commit/abort.
1944
			 */
1945
			if (TransactionIdIsValid(SnapshotDirty->xmax))
1946 1947
			{
				ReleaseBuffer(buffer);
1948 1949
				XactLockTableWait(SnapshotDirty->xmax);
				continue;		/* loop back to repeat heap_fetch */
1950 1951
			}

1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969
			/*
			 * If tuple was inserted by our own transaction, we have to check
			 * cmin against curCid: cmin >= curCid means our command cannot
			 * see the tuple, so we should ignore it.  Without this we are
			 * open to the "Halloween problem" of indefinitely re-updating
			 * the same tuple.  (We need not check cmax because
			 * HeapTupleSatisfiesDirty will consider a tuple deleted by
			 * our transaction dead, regardless of cmax.)  We just checked
			 * that priorXmax == xmin, so we can test that variable instead
			 * of doing HeapTupleHeaderGetXmin again.
			 */
			if (TransactionIdIsCurrentTransactionId(priorXmax) &&
				HeapTupleHeaderGetCmin(tuple.t_data) >= curCid)
			{
				ReleaseBuffer(buffer);
				return NULL;
			}

1970 1971 1972 1973 1974 1975 1976 1977 1978
			/*
			 * We got tuple - now copy it for use by recheck query.
			 */
			copyTuple = heap_copytuple(&tuple);
			ReleaseBuffer(buffer);
			break;
		}

		/*
B
Bruce Momjian 已提交
1979 1980
		 * If the referenced slot was actually empty, the latest version of
		 * the row must have been deleted, so we need do nothing.
1981
		 */
1982
		if (tuple.t_data == NULL)
1983
		{
1984 1985
			ReleaseBuffer(buffer);
			return NULL;
1986 1987 1988
		}

		/*
1989
		 * As above, if xmin isn't what we're expecting, do nothing.
1990
		 */
1991 1992 1993 1994 1995 1996 1997 1998 1999
		if (!TransactionIdEquals(HeapTupleHeaderGetXmin(tuple.t_data),
								 priorXmax))
		{
			ReleaseBuffer(buffer);
			return NULL;
		}

		/*
		 * If we get here, the tuple was found but failed SnapshotDirty.
B
Bruce Momjian 已提交
2000 2001 2002 2003 2004 2005
		 * Assuming the xmin is either a committed xact or our own xact (as it
		 * certainly should be if we're trying to modify the tuple), this must
		 * mean that the row was updated or deleted by either a committed xact
		 * or our own xact.  If it was deleted, we can ignore it; if it was
		 * updated then chain up to the next version and repeat the whole
		 * test.
2006
		 *
B
Bruce Momjian 已提交
2007 2008
		 * As above, it should be safe to examine xmax and t_ctid without the
		 * buffer content lock, because they can't be changing.
2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022
		 */
		if (ItemPointerEquals(&tuple.t_self, &tuple.t_data->t_ctid))
		{
			/* deleted, so forget about it */
			ReleaseBuffer(buffer);
			return NULL;
		}

		/* updated, so look at the updated row */
		tuple.t_self = tuple.t_data->t_ctid;
		/* updated row should have xmin matching this xmax */
		priorXmax = HeapTupleHeaderGetXmax(tuple.t_data);
		ReleaseBuffer(buffer);
		/* loop back to fetch next in chain */
2023 2024 2025
	}

	/*
B
Bruce Momjian 已提交
2026 2027
	 * For UPDATE/DELETE we have to return tid of actual row we're executing
	 * PQ for.
2028 2029 2030 2031
	 */
	*tid = tuple.t_self;

	/*
2032
	 * Need to run a recheck subquery.	Find or create a PQ stack entry.
2033
	 */
2034
	epq = estate->es_evalPlanQual;
2035 2036
	endNode = true;

2037 2038
	if (epq != NULL && epq->rti == 0)
	{
2039
		/* Top PQ stack entry is idle, so re-use it */
2040
		Assert(!(estate->es_useEvalPlan) && epq->next == NULL);
2041 2042 2043 2044 2045
		epq->rti = rti;
		endNode = false;
	}

	/*
B
Bruce Momjian 已提交
2046 2047 2048 2049
	 * If this is request for another RTE - Ra, - then we have to check wasn't
	 * PlanQual requested for Ra already and if so then Ra' row was updated
	 * again and we have to re-start old execution for Ra and forget all what
	 * we done after Ra was suspended. Cool? -:))
2050
	 */
B
Bruce Momjian 已提交
2051
	if (epq != NULL && epq->rti != rti &&
2052
		epq->estate->es_evTuple[rti - 1] != NULL)
2053 2054 2055
	{
		do
		{
2056 2057
			evalPlanQual *oldepq;

2058
			/* stop execution */
2059 2060 2061 2062
			EvalPlanQualStop(epq);
			/* pop previous PlanQual from the stack */
			oldepq = epq->next;
			Assert(oldepq && oldepq->rti != 0);
2063 2064 2065
			/* push current PQ to freePQ stack */
			oldepq->free = epq;
			epq = oldepq;
2066
			estate->es_evalPlanQual = epq;
2067 2068 2069
		} while (epq->rti != rti);
	}

B
Bruce Momjian 已提交
2070
	/*
B
Bruce Momjian 已提交
2071 2072
	 * If we are requested for another RTE then we have to suspend execution
	 * of current PlanQual and start execution for new one.
2073 2074 2075 2076
	 */
	if (epq == NULL || epq->rti != rti)
	{
		/* try to reuse plan used previously */
B
Bruce Momjian 已提交
2077
		evalPlanQual *newepq = (epq != NULL) ? epq->free : NULL;
2078

2079
		if (newepq == NULL)		/* first call or freePQ stack is empty */
2080
		{
2081
			newepq = (evalPlanQual *) palloc0(sizeof(evalPlanQual));
2082
			newepq->free = NULL;
2083 2084
			newepq->estate = NULL;
			newepq->planstate = NULL;
2085 2086
		}
		else
2087
		{
2088 2089 2090
			/* recycle previously used PlanQual */
			Assert(newepq->estate == NULL);
			epq->free = NULL;
2091
		}
2092
		/* push current PQ to the stack */
2093
		newepq->next = epq;
2094
		epq = newepq;
2095
		estate->es_evalPlanQual = epq;
2096 2097 2098 2099
		epq->rti = rti;
		endNode = false;
	}

2100
	Assert(epq->rti == rti);
2101 2102

	/*
B
Bruce Momjian 已提交
2103 2104 2105 2106 2107 2108
	 * Ok - we're requested for the same RTE.  Unfortunately we still have to
	 * end and restart execution of the plan, because ExecReScan wouldn't
	 * ensure that upper plan nodes would reset themselves.  We could make
	 * that work if insertion of the target tuple were integrated with the
	 * Param mechanism somehow, so that the upper plan nodes know that their
	 * children's outputs have changed.
2109
	 *
B
Bruce Momjian 已提交
2110 2111
	 * Note that the stack of free evalPlanQual nodes is quite useless at the
	 * moment, since it only saves us from pallocing/releasing the
B
Bruce Momjian 已提交
2112 2113
	 * evalPlanQual nodes themselves.  But it will be useful once we implement
	 * ReScan instead of end/restart for re-using PlanQual nodes.
2114 2115
	 */
	if (endNode)
2116
	{
2117
		/* stop execution */
2118
		EvalPlanQualStop(epq);
2119
	}
2120

2121 2122 2123
	/*
	 * Initialize new recheck query.
	 *
B
Bruce Momjian 已提交
2124 2125
	 * Note: if we were re-using PlanQual plans via ExecReScan, we'd need to
	 * instead copy down changeable state from the top plan (including
B
Bruce Momjian 已提交
2126 2127
	 * es_result_relation_info, es_junkFilter) and reset locally changeable
	 * state in the epq (including es_param_exec_vals, es_evTupleNull).
2128 2129 2130
	 */
	EvalPlanQualStart(epq, estate, epq->next);

2131
	/*
B
Bruce Momjian 已提交
2132 2133
	 * free old RTE' tuple, if any, and store target tuple where relation's
	 * scan node will see it
2134
	 */
2135
	epqstate = epq->estate;
2136 2137 2138
	if (epqstate->es_evTuple[rti - 1] != NULL)
		heap_freetuple(epqstate->es_evTuple[rti - 1]);
	epqstate->es_evTuple[rti - 1] = copyTuple;
2139

2140
	return EvalPlanQualNext(estate);
2141 2142
}

B
Bruce Momjian 已提交
2143
static TupleTableSlot *
2144 2145
EvalPlanQualNext(EState *estate)
{
2146 2147
	evalPlanQual *epq = estate->es_evalPlanQual;
	MemoryContext oldcontext;
B
Bruce Momjian 已提交
2148
	TupleTableSlot *slot;
2149 2150 2151 2152

	Assert(epq->rti != 0);

lpqnext:;
2153
	oldcontext = MemoryContextSwitchTo(epq->estate->es_query_cxt);
2154
	slot = ExecProcNode(epq->planstate);
2155
	MemoryContextSwitchTo(oldcontext);
2156 2157 2158 2159 2160 2161

	/*
	 * No more tuples for this PQ. Continue previous one.
	 */
	if (TupIsNull(slot))
	{
2162 2163
		evalPlanQual *oldepq;

2164
		/* stop execution */
2165
		EvalPlanQualStop(epq);
2166
		/* pop old PQ from the stack */
2167 2168
		oldepq = epq->next;
		if (oldepq == NULL)
2169
		{
2170 2171 2172 2173
			/* this is the first (oldest) PQ - mark as free */
			epq->rti = 0;
			estate->es_useEvalPlan = false;
			/* and continue Query execution */
2174
			return NULL;
2175 2176 2177 2178 2179
		}
		Assert(oldepq->rti != 0);
		/* push current PQ to freePQ stack */
		oldepq->free = epq;
		epq = oldepq;
2180
		estate->es_evalPlanQual = epq;
2181 2182 2183
		goto lpqnext;
	}

2184
	return slot;
2185
}
2186 2187 2188 2189

static void
EndEvalPlanQual(EState *estate)
{
2190
	evalPlanQual *epq = estate->es_evalPlanQual;
2191

2192 2193
	if (epq->rti == 0)			/* plans already shutdowned */
	{
2194
		Assert(epq->next == NULL);
2195
		return;
2196
	}
2197 2198 2199

	for (;;)
	{
2200 2201
		evalPlanQual *oldepq;

2202
		/* stop execution */
2203
		EvalPlanQualStop(epq);
2204
		/* pop old PQ from the stack */
2205 2206
		oldepq = epq->next;
		if (oldepq == NULL)
2207
		{
2208 2209 2210
			/* this is the first (oldest) PQ - mark as free */
			epq->rti = 0;
			estate->es_useEvalPlan = false;
2211 2212 2213 2214 2215 2216
			break;
		}
		Assert(oldepq->rti != 0);
		/* push current PQ to freePQ stack */
		oldepq->free = epq;
		epq = oldepq;
2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233
		estate->es_evalPlanQual = epq;
	}
}

/*
 * Start execution of one level of PlanQual.
 *
 * This is a cut-down version of ExecutorStart(): we copy some state from
 * the top-level estate rather than initializing it fresh.
 */
static void
EvalPlanQualStart(evalPlanQual *epq, EState *estate, evalPlanQual *priorepq)
{
	EState	   *epqstate;
	int			rtsize;
	MemoryContext oldcontext;

2234
	rtsize = list_length(estate->es_range_table);
2235 2236 2237 2238 2239 2240

	epq->estate = epqstate = CreateExecutorState();

	oldcontext = MemoryContextSwitchTo(epqstate->es_query_cxt);

	/*
B
Bruce Momjian 已提交
2241 2242 2243 2244
	 * The epqstates share the top query's copy of unchanging state such as
	 * the snapshot, rangetable, result-rel info, and external Param info.
	 * They need their own copies of local state, including a tuple table,
	 * es_param_exec_vals, etc.
2245 2246 2247
	 */
	epqstate->es_direction = ForwardScanDirection;
	epqstate->es_snapshot = estate->es_snapshot;
2248
	epqstate->es_crosscheck_snapshot = estate->es_crosscheck_snapshot;
2249 2250 2251 2252 2253 2254
	epqstate->es_range_table = estate->es_range_table;
	epqstate->es_result_relations = estate->es_result_relations;
	epqstate->es_num_result_relations = estate->es_num_result_relations;
	epqstate->es_result_relation_info = estate->es_result_relation_info;
	epqstate->es_junkFilter = estate->es_junkFilter;
	epqstate->es_into_relation_descriptor = estate->es_into_relation_descriptor;
2255
	epqstate->es_into_relation_use_wal = estate->es_into_relation_use_wal;
2256 2257 2258 2259
	epqstate->es_param_list_info = estate->es_param_list_info;
	if (estate->es_topPlan->nParamExec > 0)
		epqstate->es_param_exec_vals = (ParamExecData *)
			palloc0(estate->es_topPlan->nParamExec * sizeof(ParamExecData));
2260
	epqstate->es_rowMarks = estate->es_rowMarks;
2261
	epqstate->es_forUpdate = estate->es_forUpdate;
2262
	epqstate->es_rowNoWait = estate->es_rowNoWait;
2263
	epqstate->es_instrument = estate->es_instrument;
2264 2265
	epqstate->es_select_into = estate->es_select_into;
	epqstate->es_into_oids = estate->es_into_oids;
2266
	epqstate->es_topPlan = estate->es_topPlan;
B
Bruce Momjian 已提交
2267

2268
	/*
B
Bruce Momjian 已提交
2269 2270 2271
	 * Each epqstate must have its own es_evTupleNull state, but all the stack
	 * entries share es_evTuple state.	This allows sub-rechecks to inherit
	 * the value being examined by an outer recheck.
2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284
	 */
	epqstate->es_evTupleNull = (bool *) palloc0(rtsize * sizeof(bool));
	if (priorepq == NULL)
		/* first PQ stack entry */
		epqstate->es_evTuple = (HeapTuple *)
			palloc0(rtsize * sizeof(HeapTuple));
	else
		/* later stack entries share the same storage */
		epqstate->es_evTuple = priorepq->estate->es_evTuple;

	epqstate->es_tupleTable =
		ExecCreateTupleTable(estate->es_tupleTable->size);

2285
	epq->planstate = ExecInitNode(estate->es_topPlan, epqstate, 0);
2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313

	MemoryContextSwitchTo(oldcontext);
}

/*
 * End execution of one level of PlanQual.
 *
 * This is a cut-down version of ExecutorEnd(); basically we want to do most
 * of the normal cleanup, but *not* close result relations (which we are
 * just sharing from the outer query).
 */
static void
EvalPlanQualStop(evalPlanQual *epq)
{
	EState	   *epqstate = epq->estate;
	MemoryContext oldcontext;

	oldcontext = MemoryContextSwitchTo(epqstate->es_query_cxt);

	ExecEndNode(epq->planstate);

	ExecDropTupleTable(epqstate->es_tupleTable, true);
	epqstate->es_tupleTable = NULL;

	if (epqstate->es_evTuple[epq->rti - 1] != NULL)
	{
		heap_freetuple(epqstate->es_evTuple[epq->rti - 1]);
		epqstate->es_evTuple[epq->rti - 1] = NULL;
2314
	}
2315 2316 2317 2318 2319 2320 2321

	MemoryContextSwitchTo(oldcontext);

	FreeExecutorState(epqstate);

	epq->estate = NULL;
	epq->planstate = NULL;
2322
}