execMain.c 62.5 KB
Newer Older
1 2
/*-------------------------------------------------------------------------
 *
3
 * execMain.c
4
 *	  top level executor interface routines
5 6
 *
 * INTERFACE ROUTINES
7 8 9
 *	ExecutorStart()
 *	ExecutorRun()
 *	ExecutorEnd()
10
 *
11 12 13 14
 *	The old ExecutorMain() has been replaced by ExecutorStart(),
 *	ExecutorRun() and ExecutorEnd()
 *
 *	These three procedures are the external interfaces to the executor.
15
 *	In each case, the query descriptor is required as an argument.
16
 *
17
 *	ExecutorStart() must be called at the beginning of execution of any
18 19 20
 *	query plan and ExecutorEnd() should always be called at the end of
 *	execution of a plan.
 *
21
 *	ExecutorRun accepts direction and count arguments that specify whether
22
 *	the plan is to be executed forwards, backwards, and for how many tuples.
23
 *
P
 
PostgreSQL Daemon 已提交
24
 * Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group
B
Add:  
Bruce Momjian 已提交
25
 * Portions Copyright (c) 1994, Regents of the University of California
26 27 28
 *
 *
 * IDENTIFICATION
29
 *	  $PostgreSQL: pgsql/src/backend/executor/execMain.c,v 1.267 2006/02/21 23:01:54 neilc Exp $
30 31 32
 *
 *-------------------------------------------------------------------------
 */
33 34
#include "postgres.h"

35
#include "access/heapam.h"
36
#include "access/xlog.h"
37
#include "catalog/heap.h"
38
#include "catalog/namespace.h"
39
#include "commands/tablecmds.h"
40
#include "commands/tablespace.h"
41
#include "commands/trigger.h"
B
Bruce Momjian 已提交
42 43
#include "executor/execdebug.h"
#include "executor/execdefs.h"
44
#include "executor/instrument.h"
B
Bruce Momjian 已提交
45
#include "miscadmin.h"
46
#include "optimizer/clauses.h"
B
Bruce Momjian 已提交
47 48
#include "optimizer/var.h"
#include "parser/parsetree.h"
49
#include "storage/smgr.h"
B
Bruce Momjian 已提交
50
#include "utils/acl.h"
51
#include "utils/guc.h"
52
#include "utils/lsyscache.h"
53
#include "utils/memutils.h"
54

55

56 57 58 59 60 61 62 63 64
typedef struct evalPlanQual
{
	Index		rti;
	EState	   *estate;
	PlanState  *planstate;
	struct evalPlanQual *next;	/* stack of active PlanQual plans */
	struct evalPlanQual *free;	/* list of free PlanQual plans */
} evalPlanQual;

65
/* decls for local routines only used within this module */
66
static void InitPlan(QueryDesc *queryDesc, bool explainOnly);
67
static void initResultRelInfo(ResultRelInfo *resultRelInfo,
B
Bruce Momjian 已提交
68 69
				  Index resultRelationIndex,
				  List *rangeTable,
70 71
				  CmdType operation,
				  bool doInstrument);
72
static TupleTableSlot *ExecutePlan(EState *estate, PlanState *planstate,
B
Bruce Momjian 已提交
73 74 75
			CmdType operation,
			long numberTuples,
			ScanDirection direction,
76
			DestReceiver *dest);
77
static void ExecSelect(TupleTableSlot *slot,
78
		   DestReceiver *dest,
B
Bruce Momjian 已提交
79
		   EState *estate);
80
static void ExecInsert(TupleTableSlot *slot, ItemPointer tupleid,
B
Bruce Momjian 已提交
81
		   EState *estate);
82
static void ExecDelete(TupleTableSlot *slot, ItemPointer tupleid,
B
Bruce Momjian 已提交
83
		   EState *estate);
84
static void ExecUpdate(TupleTableSlot *slot, ItemPointer tupleid,
B
Bruce Momjian 已提交
85
		   EState *estate);
86
static TupleTableSlot *EvalPlanQualNext(EState *estate);
87
static void EndEvalPlanQual(EState *estate);
88 89
static void ExecCheckRTEPerms(RangeTblEntry *rte);
static void ExecCheckXactReadOnly(Query *parsetree);
90
static void EvalPlanQualStart(evalPlanQual *epq, EState *estate,
B
Bruce Momjian 已提交
91
				  evalPlanQual *priorepq);
92
static void EvalPlanQualStop(evalPlanQual *epq);
93

94 95
/* end of local decls */

96

97
/* ----------------------------------------------------------------
98 99 100 101 102
 *		ExecutorStart
 *
 *		This routine must be called at the beginning of any execution of any
 *		query plan
 *
103
 * Takes a QueryDesc previously created by CreateQueryDesc (it's not real
B
Bruce Momjian 已提交
104
 * clear why we bother to separate the two functions, but...).	The tupDesc
105 106
 * field of the QueryDesc is filled in to describe the tuples that will be
 * returned, and the internal fields (estate and planstate) are set up.
107
 *
108 109 110
 * If explainOnly is true, we are not actually intending to run the plan,
 * only to set up for EXPLAIN; so skip unwanted side-effects.
 *
111 112
 * NB: the CurrentMemoryContext when this is called will become the parent
 * of the per-query context used for this Executor invocation.
113 114
 * ----------------------------------------------------------------
 */
115
void
116
ExecutorStart(QueryDesc *queryDesc, bool explainOnly)
117
{
118
	EState	   *estate;
119
	MemoryContext oldcontext;
120

121
	/* sanity checks: queryDesc must not be started already */
122
	Assert(queryDesc != NULL);
123 124
	Assert(queryDesc->estate == NULL);

125
	/*
B
Bruce Momjian 已提交
126 127
	 * If the transaction is read-only, we need to check if any writes are
	 * planned to non-temporary tables.
128
	 */
129 130
	if (XactReadOnly && !explainOnly)
		ExecCheckXactReadOnly(queryDesc->parsetree);
131

132
	/*
133
	 * Build EState, switch into per-query memory context for startup.
134 135 136 137
	 */
	estate = CreateExecutorState();
	queryDesc->estate = estate;

138 139 140 141 142
	oldcontext = MemoryContextSwitchTo(estate->es_query_cxt);

	/*
	 * Fill in parameters, if any, from queryDesc
	 */
143
	estate->es_param_list_info = queryDesc->params;
144

V
Vadim B. Mikheev 已提交
145
	if (queryDesc->plantree->nParamExec > 0)
146
		estate->es_param_exec_vals = (ParamExecData *)
147
			palloc0(queryDesc->plantree->nParamExec * sizeof(ParamExecData));
148

149
	/*
150
	 * Copy other important information into the EState
151
	 */
152 153 154
	estate->es_snapshot = queryDesc->snapshot;
	estate->es_crosscheck_snapshot = queryDesc->crosscheck_snapshot;
	estate->es_instrument = queryDesc->doInstrument;
155

156
	/*
157
	 * Initialize the plan state tree
158
	 */
159
	InitPlan(queryDesc, explainOnly);
160 161

	MemoryContextSwitchTo(oldcontext);
162 163 164
}

/* ----------------------------------------------------------------
165 166 167 168 169 170 171
 *		ExecutorRun
 *
 *		This is the main routine of the executor module. It accepts
 *		the query descriptor from the traffic cop and executes the
 *		query plan.
 *
 *		ExecutorStart must have been called already.
172
 *
173 174 175
 *		If direction is NoMovementScanDirection then nothing is done
 *		except to start up/shut down the destination.  Otherwise,
 *		we retrieve up to 'count' tuples in the specified direction.
176
 *
177
 *		Note: count = 0 is interpreted as no portal limit, i.e., run to
178
 *		completion.
179
 *
180 181
 * ----------------------------------------------------------------
 */
182
TupleTableSlot *
183
ExecutorRun(QueryDesc *queryDesc,
184
			ScanDirection direction, long count)
185
{
186
	EState	   *estate;
187
	CmdType		operation;
188
	DestReceiver *dest;
189
	TupleTableSlot *result;
190 191 192 193 194 195 196 197
	MemoryContext oldcontext;

	/* sanity checks */
	Assert(queryDesc != NULL);

	estate = queryDesc->estate;

	Assert(estate != NULL);
198

B
Bruce Momjian 已提交
199
	/*
200
	 * Switch into per-query memory context
201
	 */
202
	oldcontext = MemoryContextSwitchTo(estate->es_query_cxt);
203

B
Bruce Momjian 已提交
204
	/*
B
Bruce Momjian 已提交
205
	 * extract information from the query descriptor and the query feature.
206
	 */
207 208 209
	operation = queryDesc->operation;
	dest = queryDesc->dest;

B
Bruce Momjian 已提交
210
	/*
211
	 * startup tuple receiver
212
	 */
213 214
	estate->es_processed = 0;
	estate->es_lastoid = InvalidOid;
215

216
	(*dest->rStartup) (dest, operation, queryDesc->tupDesc);
217

218 219 220
	/*
	 * run plan
	 */
221
	if (ScanDirectionIsNoMovement(direction))
222 223 224
		result = NULL;
	else
		result = ExecutePlan(estate,
225
							 queryDesc->planstate,
226 227 228
							 operation,
							 count,
							 direction,
229
							 dest);
230

231 232 233
	/*
	 * shutdown receiver
	 */
234
	(*dest->rShutdown) (dest);
235

236 237
	MemoryContextSwitchTo(oldcontext);

238
	return result;
239 240 241
}

/* ----------------------------------------------------------------
242 243
 *		ExecutorEnd
 *
244
 *		This routine must be called at the end of execution of any
245
 *		query plan
246 247 248
 * ----------------------------------------------------------------
 */
void
249
ExecutorEnd(QueryDesc *queryDesc)
250
{
251
	EState	   *estate;
252
	MemoryContext oldcontext;
253

254 255
	/* sanity checks */
	Assert(queryDesc != NULL);
256

257 258
	estate = queryDesc->estate;

259
	Assert(estate != NULL);
260

261
	/*
262
	 * Switch into per-query memory context to run ExecEndPlan
263
	 */
264 265 266
	oldcontext = MemoryContextSwitchTo(estate->es_query_cxt);

	ExecEndPlan(queryDesc->planstate, estate);
267

268
	/*
269
	 * Must switch out of context before destroying it
270
	 */
271
	MemoryContextSwitchTo(oldcontext);
272

273
	/*
274 275
	 * Release EState and per-query memory context.  This should release
	 * everything the executor has allocated.
276
	 */
277 278 279 280 281 282
	FreeExecutorState(estate);

	/* Reset queryDesc fields that no longer point to anything */
	queryDesc->tupDesc = NULL;
	queryDesc->estate = NULL;
	queryDesc->planstate = NULL;
283
}
284

285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320
/* ----------------------------------------------------------------
 *		ExecutorRewind
 *
 *		This routine may be called on an open queryDesc to rewind it
 *		to the start.
 * ----------------------------------------------------------------
 */
void
ExecutorRewind(QueryDesc *queryDesc)
{
	EState	   *estate;
	MemoryContext oldcontext;

	/* sanity checks */
	Assert(queryDesc != NULL);

	estate = queryDesc->estate;

	Assert(estate != NULL);

	/* It's probably not sensible to rescan updating queries */
	Assert(queryDesc->operation == CMD_SELECT);

	/*
	 * Switch into per-query memory context
	 */
	oldcontext = MemoryContextSwitchTo(estate->es_query_cxt);

	/*
	 * rescan plan
	 */
	ExecReScan(queryDesc->planstate, NULL);

	MemoryContextSwitchTo(oldcontext);
}

321

322 323 324 325
/*
 * ExecCheckRTPerms
 *		Check access permissions for all relations listed in a range table.
 */
326
void
327
ExecCheckRTPerms(List *rangeTable)
328
{
329
	ListCell   *l;
330

331
	foreach(l, rangeTable)
332
	{
333
		RangeTblEntry *rte = lfirst(l);
334

335
		ExecCheckRTEPerms(rte);
336 337 338 339 340 341 342 343
	}
}

/*
 * ExecCheckRTEPerms
 *		Check access permissions for a single RTE.
 */
static void
344
ExecCheckRTEPerms(RangeTblEntry *rte)
345
{
346
	AclMode		requiredPerms;
347
	Oid			relOid;
B
Bruce Momjian 已提交
348
	Oid			userid;
349

B
Bruce Momjian 已提交
350
	/*
B
Bruce Momjian 已提交
351 352 353 354 355 356
	 * Only plain-relation RTEs need to be checked here.  Subquery RTEs are
	 * checked by ExecInitSubqueryScan if the subquery is still a separate
	 * subquery --- if it's been pulled up into our query level then the RTEs
	 * are in our rangetable and will be checked here. Function RTEs are
	 * checked by init_fcache when the function is prepared for execution.
	 * Join and special RTEs need no checks.
B
Bruce Momjian 已提交
357
	 */
358
	if (rte->rtekind != RTE_RELATION)
359 360
		return;

361 362 363 364 365 366 367
	/*
	 * No work if requiredPerms is empty.
	 */
	requiredPerms = rte->requiredPerms;
	if (requiredPerms == 0)
		return;

368
	relOid = rte->relid;
369 370

	/*
B
Bruce Momjian 已提交
371
	 * userid to check as: current user unless we have a setuid indication.
372
	 *
373 374 375 376
	 * Note: GetUserId() is presently fast enough that there's no harm in
	 * calling it separately for each RTE.	If that stops being true, we could
	 * call it once in ExecCheckRTPerms and pass the userid down from there.
	 * But for now, no need for the extra clutter.
377
	 */
378
	userid = rte->checkAsUser ? rte->checkAsUser : GetUserId();
379

380
	/*
B
Bruce Momjian 已提交
381
	 * We must have *all* the requiredPerms bits, so use aclmask not aclcheck.
382
	 */
383 384 385 386
	if (pg_class_aclmask(relOid, userid, requiredPerms, ACLMASK_ALL)
		!= requiredPerms)
		aclcheck_error(ACLCHECK_NO_PRIV, ACL_KIND_CLASS,
					   get_rel_name(relOid));
387 388
}

389 390 391
/*
 * Check that the query does not imply any writes to non-temp tables.
 */
392
static void
393
ExecCheckXactReadOnly(Query *parsetree)
394
{
395
	ListCell   *l;
396

397 398 399 400 401 402
	/*
	 * CREATE TABLE AS or SELECT INTO?
	 *
	 * XXX should we allow this if the destination is temp?
	 */
	if (parsetree->into != NULL)
403 404
		goto fail;

405
	/* Fail if write permissions are requested on any non-temp table */
406
	foreach(l, parsetree->rtable)
407
	{
408
		RangeTblEntry *rte = lfirst(l);
409

410
		if (rte->rtekind == RTE_SUBQUERY)
411
		{
412 413 414
			ExecCheckXactReadOnly(rte->subquery);
			continue;
		}
415

416 417
		if (rte->rtekind != RTE_RELATION)
			continue;
418

419 420
		if ((rte->requiredPerms & (~ACL_SELECT)) == 0)
			continue;
421

422 423
		if (isTempNamespace(get_rel_namespace(rte->relid)))
			continue;
424

425
		goto fail;
426 427 428 429 430
	}

	return;

fail:
431 432 433
	ereport(ERROR,
			(errcode(ERRCODE_READ_ONLY_SQL_TRANSACTION),
			 errmsg("transaction is read-only")));
434 435 436
}


437
/* ----------------------------------------------------------------
438 439 440 441
 *		InitPlan
 *
 *		Initializes the query plan: open files, allocate storage
 *		and start up the rule manager
442 443
 * ----------------------------------------------------------------
 */
444
static void
445
InitPlan(QueryDesc *queryDesc, bool explainOnly)
446
{
447
	CmdType		operation = queryDesc->operation;
B
Bruce Momjian 已提交
448 449 450
	Query	   *parseTree = queryDesc->parsetree;
	Plan	   *plan = queryDesc->plantree;
	EState	   *estate = queryDesc->estate;
451
	PlanState  *planstate;
B
Bruce Momjian 已提交
452 453
	List	   *rangeTable;
	Relation	intoRelationDesc;
454
	bool		do_select_into;
B
Bruce Momjian 已提交
455
	TupleDesc	tupType;
456

457
	/*
B
Bruce Momjian 已提交
458 459
	 * Do permissions checks.  It's sufficient to examine the query's top
	 * rangetable here --- subplan RTEs will be checked during
460
	 * ExecInitSubPlan().
461
	 */
462
	ExecCheckRTPerms(parseTree->rtable);
463

B
Bruce Momjian 已提交
464
	/*
B
Bruce Momjian 已提交
465
	 * get information from query descriptor
466
	 */
467
	rangeTable = parseTree->rtable;
468

B
Bruce Momjian 已提交
469
	/*
B
Bruce Momjian 已提交
470
	 * initialize the node's execution state
471
	 */
472 473
	estate->es_range_table = rangeTable;

B
Bruce Momjian 已提交
474
	/*
475
	 * if there is a result relation, initialize result relation stuff
476
	 */
477
	if (parseTree->resultRelation != 0 && operation != CMD_SELECT)
478
	{
479 480 481
		List	   *resultRelations = parseTree->resultRelations;
		int			numResultRelations;
		ResultRelInfo *resultRelInfos;
B
Bruce Momjian 已提交
482

483 484 485 486 487 488
		if (resultRelations != NIL)
		{
			/*
			 * Multiple result relations (due to inheritance)
			 * parseTree->resultRelations identifies them all
			 */
B
Bruce Momjian 已提交
489 490
			ResultRelInfo *resultRelInfo;
			ListCell   *l;
491

492
			numResultRelations = list_length(resultRelations);
493 494 495
			resultRelInfos = (ResultRelInfo *)
				palloc(numResultRelations * sizeof(ResultRelInfo));
			resultRelInfo = resultRelInfos;
496
			foreach(l, resultRelations)
497 498
			{
				initResultRelInfo(resultRelInfo,
499
								  lfirst_int(l),
500
								  rangeTable,
501 502
								  operation,
								  estate->es_instrument);
503 504 505 506 507 508
				resultRelInfo++;
			}
		}
		else
		{
			/*
B
Bruce Momjian 已提交
509
			 * Single result relation identified by parseTree->resultRelation
510 511 512 513 514 515
			 */
			numResultRelations = 1;
			resultRelInfos = (ResultRelInfo *) palloc(sizeof(ResultRelInfo));
			initResultRelInfo(resultRelInfos,
							  parseTree->resultRelation,
							  rangeTable,
516 517
							  operation,
							  estate->es_instrument);
518
		}
519

520 521 522 523
		estate->es_result_relations = resultRelInfos;
		estate->es_num_result_relations = numResultRelations;
		/* Initialize to first or only result rel */
		estate->es_result_relation_info = resultRelInfos;
524
	}
525 526
	else
	{
B
Bruce Momjian 已提交
527
		/*
B
Bruce Momjian 已提交
528
		 * if no result relation, then set state appropriately
529
		 */
530 531
		estate->es_result_relations = NULL;
		estate->es_num_result_relations = 0;
532 533 534
		estate->es_result_relation_info = NULL;
	}

535
	/*
T
Tom Lane 已提交
536
	 * Detect whether we're doing SELECT INTO.  If so, set the es_into_oids
B
Bruce Momjian 已提交
537 538
	 * flag appropriately so that the plan tree will be initialized with the
	 * correct tuple descriptors.
539 540 541
	 */
	do_select_into = false;

542
	if (operation == CMD_SELECT && parseTree->into != NULL)
543 544
	{
		do_select_into = true;
545 546
		estate->es_select_into = true;
		estate->es_into_oids = parseTree->intoHasOids;
547 548
	}

549
	/*
550
	 * Have to lock relations selected FOR UPDATE/FOR SHARE
551
	 */
552
	estate->es_rowMarks = NIL;
553
	estate->es_forUpdate = parseTree->forUpdate;
554
	estate->es_rowNoWait = parseTree->rowNoWait;
555
	if (parseTree->rowMarks != NIL)
556
	{
557
		ListCell   *l;
558

559
		foreach(l, parseTree->rowMarks)
560
		{
561
			Index		rti = lfirst_int(l);
562
			Oid			relid = getrelid(rti, rangeTable);
563
			Relation	relation;
564
			ExecRowMark *erm;
565 566

			relation = heap_open(relid, RowShareLock);
567
			erm = (ExecRowMark *) palloc(sizeof(ExecRowMark));
568
			erm->relation = relation;
569
			erm->rti = rti;
570
			snprintf(erm->resname, sizeof(erm->resname), "ctid%u", rti);
571
			estate->es_rowMarks = lappend(estate->es_rowMarks, erm);
572 573
		}
	}
574

B
Bruce Momjian 已提交
575
	/*
B
Bruce Momjian 已提交
576 577 578
	 * initialize the executor "tuple" table.  We need slots for all the plan
	 * nodes, plus possibly output slots for the junkfilter(s). At this point
	 * we aren't sure if we need junkfilters, so just add slots for them
579 580
	 * unconditionally.  Also, if it's not a SELECT, set up a slot for use for
	 * trigger output tuples.
581 582
	 */
	{
583
		int			nSlots = ExecCountSlotsNode(plan);
584

585
		if (parseTree->resultRelations != NIL)
586
			nSlots += list_length(parseTree->resultRelations);
587 588
		else
			nSlots += 1;
589 590 591
		if (operation != CMD_SELECT)
			nSlots++;

592
		estate->es_tupleTable = ExecCreateTupleTable(nSlots);
593 594 595 596

		if (operation != CMD_SELECT)
			estate->es_trig_tuple_slot =
				ExecAllocTableSlot(estate->es_tupleTable);
597
	}
598

599
	/* mark EvalPlanQual not active */
600
	estate->es_topPlan = plan;
601 602
	estate->es_evalPlanQual = NULL;
	estate->es_evTupleNull = NULL;
603
	estate->es_evTuple = NULL;
604 605
	estate->es_useEvalPlan = false;

B
Bruce Momjian 已提交
606
	/*
B
Bruce Momjian 已提交
607 608 609
	 * initialize the private state information for all the nodes in the query
	 * tree.  This opens files, allocates storage and leaves us ready to start
	 * processing tuples.
610
	 */
611
	planstate = ExecInitNode(plan, estate);
612

B
Bruce Momjian 已提交
613
	/*
B
Bruce Momjian 已提交
614 615 616
	 * Get the tuple descriptor describing the type of tuples to return. (this
	 * is especially important if we are creating a relation with "SELECT
	 * INTO")
617
	 */
618
	tupType = ExecGetResultType(planstate);
619

B
Bruce Momjian 已提交
620
	/*
B
Bruce Momjian 已提交
621 622 623 624 625 626
	 * Initialize the junk filter if needed.  SELECT and INSERT queries need a
	 * filter if there are any junk attrs in the tlist.  INSERT and SELECT
	 * INTO also need a filter if the plan may return raw disk tuples (else
	 * heap_insert will be scribbling on the source relation!). UPDATE and
	 * DELETE always need a filter, since there's always a junk 'ctid'
	 * attribute present --- no need to look first.
627 628
	 */
	{
629
		bool		junk_filter_needed = false;
630
		ListCell   *tlist;
631

632
		switch (operation)
633
		{
634 635
			case CMD_SELECT:
			case CMD_INSERT:
636
				foreach(tlist, plan->targetlist)
637
				{
638 639
					TargetEntry *tle = (TargetEntry *) lfirst(tlist);

640
					if (tle->resjunk)
641 642 643 644
					{
						junk_filter_needed = true;
						break;
					}
645
				}
646
				if (!junk_filter_needed &&
647 648 649
					(operation == CMD_INSERT || do_select_into) &&
					ExecMayReturnRawTuples(planstate))
					junk_filter_needed = true;
650 651 652 653 654 655 656
				break;
			case CMD_UPDATE:
			case CMD_DELETE:
				junk_filter_needed = true;
				break;
			default:
				break;
657 658
		}

659
		if (junk_filter_needed)
660
		{
661
			/*
B
Bruce Momjian 已提交
662 663 664
			 * If there are multiple result relations, each one needs its own
			 * junk filter.  Note this is only possible for UPDATE/DELETE, so
			 * we can't be fooled by some needing a filter and some not.
665 666 667
			 */
			if (parseTree->resultRelations != NIL)
			{
668 669
				PlanState **appendplans;
				int			as_nplans;
670
				ResultRelInfo *resultRelInfo;
671
				int			i;
672 673 674 675

				/* Top plan had better be an Append here. */
				Assert(IsA(plan, Append));
				Assert(((Append *) plan)->isTarget);
676 677 678 679
				Assert(IsA(planstate, AppendState));
				appendplans = ((AppendState *) planstate)->appendplans;
				as_nplans = ((AppendState *) planstate)->as_nplans;
				Assert(as_nplans == estate->es_num_result_relations);
680
				resultRelInfo = estate->es_result_relations;
681
				for (i = 0; i < as_nplans; i++)
682
				{
683
					PlanState  *subplan = appendplans[i];
684 685
					JunkFilter *j;

686
					j = ExecInitJunkFilter(subplan->plan->targetlist,
B
Bruce Momjian 已提交
687 688
							resultRelInfo->ri_RelationDesc->rd_att->tdhasoid,
								  ExecAllocTableSlot(estate->es_tupleTable));
689 690 691
					resultRelInfo->ri_junkFilter = j;
					resultRelInfo++;
				}
B
Bruce Momjian 已提交
692

693
				/*
B
Bruce Momjian 已提交
694 695
				 * Set active junkfilter too; at this point ExecInitAppend has
				 * already selected an active result relation...
696 697 698 699 700 701 702
				 */
				estate->es_junkFilter =
					estate->es_result_relation_info->ri_junkFilter;
			}
			else
			{
				/* Normal case with just one JunkFilter */
703
				JunkFilter *j;
704

705
				j = ExecInitJunkFilter(planstate->plan->targetlist,
706
									   tupType->tdhasoid,
B
Bruce Momjian 已提交
707
								  ExecAllocTableSlot(estate->es_tupleTable));
708 709 710
				estate->es_junkFilter = j;
				if (estate->es_result_relation_info)
					estate->es_result_relation_info->ri_junkFilter = j;
711

712 713 714 715
				/* For SELECT, want to return the cleaned tuple type */
				if (operation == CMD_SELECT)
					tupType = j->jf_cleanTupType;
			}
716 717 718 719
		}
		else
			estate->es_junkFilter = NULL;
	}
720

B
Bruce Momjian 已提交
721
	/*
722
	 * If doing SELECT INTO, initialize the "into" relation.  We must wait
B
Bruce Momjian 已提交
723 724
	 * till now so we have the "clean" result tuple type to create the new
	 * table from.
725 726
	 *
	 * If EXPLAIN, skip creating the "into" relation.
727
	 */
728
	intoRelationDesc = NULL;
729

730
	if (do_select_into && !explainOnly)
731
	{
732 733
		char	   *intoName;
		Oid			namespaceId;
734
		Oid			tablespaceId;
735 736 737
		AclResult	aclresult;
		Oid			intoRelationId;
		TupleDesc	tupdesc;
738

739 740 741 742 743 744 745 746
		/*
		 * Check consistency of arguments
		 */
		if (parseTree->intoOnCommit != ONCOMMIT_NOOP && !parseTree->into->istemp)
			ereport(ERROR,
					(errcode(ERRCODE_INVALID_TABLE_DEFINITION),
					 errmsg("ON COMMIT can only be used on temporary tables")));

747 748 749 750 751
		/*
		 * find namespace to create in, check permissions
		 */
		intoName = parseTree->into->relname;
		namespaceId = RangeVarGetCreationNamespace(parseTree->into);
752

753 754 755
		aclresult = pg_namespace_aclcheck(namespaceId, GetUserId(),
										  ACL_CREATE);
		if (aclresult != ACLCHECK_OK)
756 757
			aclcheck_error(aclresult, ACL_KIND_NAMESPACE,
						   get_namespace_name(namespaceId));
758

759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789
		/*
		 * Select tablespace to use.  If not specified, use default_tablespace
		 * (which may in turn default to database's default).
		 */
		if (parseTree->intoTableSpaceName)
		{
			tablespaceId = get_tablespace_oid(parseTree->intoTableSpaceName);
			if (!OidIsValid(tablespaceId))
				ereport(ERROR,
						(errcode(ERRCODE_UNDEFINED_OBJECT),
						 errmsg("tablespace \"%s\" does not exist",
								parseTree->intoTableSpaceName)));
		} else
		{
			tablespaceId = GetDefaultTablespace();
			/* note InvalidOid is OK in this case */
		}

		/* Check permissions except when using the database's default */
		if (OidIsValid(tablespaceId))
		{
			AclResult	aclresult;

			aclresult = pg_tablespace_aclcheck(tablespaceId, GetUserId(),
											   ACL_CREATE);

			if (aclresult != ACLCHECK_OK)
				aclcheck_error(aclresult, ACL_KIND_TABLESPACE,
							   get_tablespace_name(tablespaceId));
		}

790 791 792 793
		/*
		 * have to copy tupType to get rid of constraints
		 */
		tupdesc = CreateTupleDescCopy(tupType);
794

795 796
		intoRelationId = heap_create_with_catalog(intoName,
												  namespaceId,
797
												  tablespaceId,
798
												  InvalidOid,
799
												  GetUserId(),
800 801 802
												  tupdesc,
												  RELKIND_RELATION,
												  false,
803 804
												  true,
												  0,
805
												  parseTree->intoOnCommit,
806
												  allowSystemTableMods);
807

808
		FreeTupleDesc(tupdesc);
809

810
		/*
B
Bruce Momjian 已提交
811 812
		 * Advance command counter so that the newly-created relation's
		 * catalog tuples will be visible to heap_open.
813 814
		 */
		CommandCounterIncrement();
815

816
		/*
B
Bruce Momjian 已提交
817 818 819
		 * If necessary, create a TOAST table for the into relation. Note that
		 * AlterTableCreateToastTable ends with CommandCounterIncrement(), so
		 * that the TOAST table will be visible for insertion.
820 821
		 */
		AlterTableCreateToastTable(intoRelationId, true);
822

823 824 825 826
		/*
		 * And open the constructed table for writing.
		 */
		intoRelationDesc = heap_open(intoRelationId, AccessExclusiveLock);
827 828 829 830 831 832 833

		/* use_wal off requires rd_targblock be initially invalid */
		Assert(intoRelationDesc->rd_targblock == InvalidBlockNumber);

		/*
		 * We can skip WAL-logging the insertions, unless PITR is in use.
		 *
834 835 836 837 838
		 * Note that for a non-temp INTO table, this is safe only because we
		 * know that the catalog changes above will have been WAL-logged, and
		 * so RecordTransactionCommit will think it needs to WAL-log the
		 * eventual transaction commit.  Else the commit might be lost, even
		 * though all the data is safely fsync'd ...
839 840
		 */
		estate->es_into_relation_use_wal = XLogArchivingActive();
841 842 843 844
	}

	estate->es_into_relation_descriptor = intoRelationDesc;

845 846
	queryDesc->tupDesc = tupType;
	queryDesc->planstate = planstate;
847 848
}

849 850 851 852 853 854 855
/*
 * Initialize ResultRelInfo data for one result relation
 */
static void
initResultRelInfo(ResultRelInfo *resultRelInfo,
				  Index resultRelationIndex,
				  List *rangeTable,
856 857
				  CmdType operation,
				  bool doInstrument)
858 859 860 861 862 863 864 865 866 867
{
	Oid			resultRelationOid;
	Relation	resultRelationDesc;

	resultRelationOid = getrelid(resultRelationIndex, rangeTable);
	resultRelationDesc = heap_open(resultRelationOid, RowExclusiveLock);

	switch (resultRelationDesc->rd_rel->relkind)
	{
		case RELKIND_SEQUENCE:
868 869
			ereport(ERROR,
					(errcode(ERRCODE_WRONG_OBJECT_TYPE),
870
					 errmsg("cannot change sequence \"%s\"",
B
Bruce Momjian 已提交
871
							RelationGetRelationName(resultRelationDesc))));
872 873
			break;
		case RELKIND_TOASTVALUE:
874 875
			ereport(ERROR,
					(errcode(ERRCODE_WRONG_OBJECT_TYPE),
876
					 errmsg("cannot change TOAST relation \"%s\"",
B
Bruce Momjian 已提交
877
							RelationGetRelationName(resultRelationDesc))));
878 879
			break;
		case RELKIND_VIEW:
880 881
			ereport(ERROR,
					(errcode(ERRCODE_WRONG_OBJECT_TYPE),
882
					 errmsg("cannot change view \"%s\"",
B
Bruce Momjian 已提交
883
							RelationGetRelationName(resultRelationDesc))));
884 885 886 887 888 889 890 891 892 893
			break;
	}

	MemSet(resultRelInfo, 0, sizeof(ResultRelInfo));
	resultRelInfo->type = T_ResultRelInfo;
	resultRelInfo->ri_RangeTableIndex = resultRelationIndex;
	resultRelInfo->ri_RelationDesc = resultRelationDesc;
	resultRelInfo->ri_NumIndices = 0;
	resultRelInfo->ri_IndexRelationDescs = NULL;
	resultRelInfo->ri_IndexRelationInfo = NULL;
894 895
	/* make a copy so as not to depend on relcache info not changing... */
	resultRelInfo->ri_TrigDesc = CopyTriggerDesc(resultRelationDesc->trigdesc);
896 897
	if (resultRelInfo->ri_TrigDesc)
	{
B
Bruce Momjian 已提交
898
		int			n = resultRelInfo->ri_TrigDesc->numtriggers;
899 900 901 902 903 904 905 906 907 908 909 910 911

		resultRelInfo->ri_TrigFunctions = (FmgrInfo *)
			palloc0(n * sizeof(FmgrInfo));
		if (doInstrument)
			resultRelInfo->ri_TrigInstrument = InstrAlloc(n);
		else
			resultRelInfo->ri_TrigInstrument = NULL;
	}
	else
	{
		resultRelInfo->ri_TrigFunctions = NULL;
		resultRelInfo->ri_TrigInstrument = NULL;
	}
912 913 914 915 916
	resultRelInfo->ri_ConstraintExprs = NULL;
	resultRelInfo->ri_junkFilter = NULL;

	/*
	 * If there are indices on the result relation, open them and save
B
Bruce Momjian 已提交
917 918 919
	 * descriptors in the result relation info, so that we can add new index
	 * entries for the tuples we add/update.  We need not do this for a
	 * DELETE, however, since deletion doesn't affect indexes.
920 921 922 923 924 925
	 */
	if (resultRelationDesc->rd_rel->relhasindex &&
		operation != CMD_DELETE)
		ExecOpenIndices(resultRelInfo);
}

926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982
/*
 *		ExecContextForcesOids
 *
 * This is pretty grotty: when doing INSERT, UPDATE, or SELECT INTO,
 * we need to ensure that result tuples have space for an OID iff they are
 * going to be stored into a relation that has OIDs.  In other contexts
 * we are free to choose whether to leave space for OIDs in result tuples
 * (we generally don't want to, but we do if a physical-tlist optimization
 * is possible).  This routine checks the plan context and returns TRUE if the
 * choice is forced, FALSE if the choice is not forced.  In the TRUE case,
 * *hasoids is set to the required value.
 *
 * One reason this is ugly is that all plan nodes in the plan tree will emit
 * tuples with space for an OID, though we really only need the topmost node
 * to do so.  However, node types like Sort don't project new tuples but just
 * return their inputs, and in those cases the requirement propagates down
 * to the input node.  Eventually we might make this code smart enough to
 * recognize how far down the requirement really goes, but for now we just
 * make all plan nodes do the same thing if the top level forces the choice.
 *
 * We assume that estate->es_result_relation_info is already set up to
 * describe the target relation.  Note that in an UPDATE that spans an
 * inheritance tree, some of the target relations may have OIDs and some not.
 * We have to make the decisions on a per-relation basis as we initialize
 * each of the child plans of the topmost Append plan.
 *
 * SELECT INTO is even uglier, because we don't have the INTO relation's
 * descriptor available when this code runs; we have to look aside at a
 * flag set by InitPlan().
 */
bool
ExecContextForcesOids(PlanState *planstate, bool *hasoids)
{
	if (planstate->state->es_select_into)
	{
		*hasoids = planstate->state->es_into_oids;
		return true;
	}
	else
	{
		ResultRelInfo *ri = planstate->state->es_result_relation_info;

		if (ri != NULL)
		{
			Relation	rel = ri->ri_RelationDesc;

			if (rel != NULL)
			{
				*hasoids = rel->rd_rel->relhasoids;
				return true;
			}
		}
	}

	return false;
}

983
/* ----------------------------------------------------------------
984
 *		ExecEndPlan
985
 *
986
 *		Cleans up the query plan -- closes files and frees up storage
987 988 989 990 991 992
 *
 * NOTE: we are no longer very worried about freeing storage per se
 * in this code; FreeExecutorState should be guaranteed to release all
 * memory that needs to be released.  What we are worried about doing
 * is closing relations and dropping buffer pins.  Thus, for example,
 * tuple tables must be cleared or dropped to ensure pins are released.
993 994
 * ----------------------------------------------------------------
 */
995
void
996
ExecEndPlan(PlanState *planstate, EState *estate)
997
{
998 999
	ResultRelInfo *resultRelInfo;
	int			i;
1000
	ListCell   *l;
1001

1002 1003 1004 1005 1006 1007
	/*
	 * shut down any PlanQual processing we were doing
	 */
	if (estate->es_evalPlanQual != NULL)
		EndEvalPlanQual(estate);

B
Bruce Momjian 已提交
1008
	/*
1009
	 * shut down the node-type-specific query processing
1010
	 */
1011
	ExecEndNode(planstate);
1012

B
Bruce Momjian 已提交
1013
	/*
B
Bruce Momjian 已提交
1014
	 * destroy the executor "tuple" table.
1015
	 */
1016 1017
	ExecDropTupleTable(estate->es_tupleTable, true);
	estate->es_tupleTable = NULL;
1018

B
Bruce Momjian 已提交
1019
	/*
B
Bruce Momjian 已提交
1020
	 * close the result relation(s) if any, but hold locks until xact commit.
1021
	 */
1022 1023
	resultRelInfo = estate->es_result_relations;
	for (i = estate->es_num_result_relations; i > 0; i--)
1024
	{
1025 1026 1027 1028
		/* Close indices and then the relation itself */
		ExecCloseIndices(resultRelInfo);
		heap_close(resultRelInfo->ri_RelationDesc, NoLock);
		resultRelInfo++;
1029 1030
	}

B
Bruce Momjian 已提交
1031
	/*
1032
	 * close the "into" relation if necessary, again keeping lock
1033
	 */
1034
	if (estate->es_into_relation_descriptor != NULL)
1035 1036
	{
		/*
B
Bruce Momjian 已提交
1037 1038 1039 1040
		 * If we skipped using WAL, and it's not a temp relation, we must
		 * force the relation down to disk before it's safe to commit the
		 * transaction.  This requires forcing out any dirty buffers and then
		 * doing a forced fsync.
1041 1042 1043 1044 1045
		 */
		if (!estate->es_into_relation_use_wal &&
			!estate->es_into_relation_descriptor->rd_istemp)
		{
			FlushRelationBuffers(estate->es_into_relation_descriptor);
1046
			/* FlushRelationBuffers will have opened rd_smgr */
1047 1048 1049
			smgrimmedsync(estate->es_into_relation_descriptor->rd_smgr);
		}

1050
		heap_close(estate->es_into_relation_descriptor, NoLock);
1051
	}
1052 1053

	/*
1054
	 * close any relations selected FOR UPDATE/FOR SHARE, again keeping locks
1055
	 */
1056
	foreach(l, estate->es_rowMarks)
1057
	{
1058
		ExecRowMark *erm = lfirst(l);
1059 1060 1061

		heap_close(erm->relation, NoLock);
	}
1062 1063 1064
}

/* ----------------------------------------------------------------
1065 1066
 *		ExecutePlan
 *
1067
 *		processes the query plan to retrieve 'numberTuples' tuples in the
1068
 *		direction specified.
1069
 *
1070
 *		Retrieves all tuples if numberTuples is 0
1071
 *
1072
 *		result is either a slot containing the last tuple in the case
1073
 *		of a SELECT or NULL otherwise.
1074
 *
1075 1076
 * Note: the ctid attribute is a 'junk' attribute that is removed before the
 * user can see it
1077 1078 1079
 * ----------------------------------------------------------------
 */
static TupleTableSlot *
1080
ExecutePlan(EState *estate,
1081
			PlanState *planstate,
1082
			CmdType operation,
1083
			long numberTuples,
1084
			ScanDirection direction,
1085
			DestReceiver *dest)
1086
{
B
Bruce Momjian 已提交
1087 1088 1089 1090 1091 1092
	JunkFilter *junkfilter;
	TupleTableSlot *slot;
	ItemPointer tupleid = NULL;
	ItemPointerData tuple_ctid;
	long		current_tuple_count;
	TupleTableSlot *result;
1093

B
Bruce Momjian 已提交
1094
	/*
B
Bruce Momjian 已提交
1095
	 * initialize local variables
1096
	 */
1097 1098 1099 1100
	slot = NULL;
	current_tuple_count = 0;
	result = NULL;

B
Bruce Momjian 已提交
1101 1102
	/*
	 * Set the direction.
1103
	 */
1104 1105
	estate->es_direction = direction;

1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121
	/*
	 * Process BEFORE EACH STATEMENT triggers
	 */
	switch (operation)
	{
		case CMD_UPDATE:
			ExecBSUpdateTriggers(estate, estate->es_result_relation_info);
			break;
		case CMD_DELETE:
			ExecBSDeleteTriggers(estate, estate->es_result_relation_info);
			break;
		case CMD_INSERT:
			ExecBSInsertTriggers(estate, estate->es_result_relation_info);
			break;
		default:
			/* do nothing */
1122
			break;
1123 1124
	}

B
Bruce Momjian 已提交
1125
	/*
B
Bruce Momjian 已提交
1126
	 * Loop until we've processed the proper number of tuples from the plan.
1127 1128 1129 1130
	 */

	for (;;)
	{
1131 1132
		/* Reset the per-output-tuple exprcontext */
		ResetPerTupleExprContext(estate);
B
Bruce Momjian 已提交
1133

B
Bruce Momjian 已提交
1134
		/*
B
Bruce Momjian 已提交
1135
		 * Execute the plan and obtain a tuple
1136
		 */
B
Bruce Momjian 已提交
1137
lnext:	;
1138 1139 1140 1141
		if (estate->es_useEvalPlan)
		{
			slot = EvalPlanQualNext(estate);
			if (TupIsNull(slot))
1142
				slot = ExecProcNode(planstate);
1143 1144
		}
		else
1145
			slot = ExecProcNode(planstate);
1146

B
Bruce Momjian 已提交
1147
		/*
B
Bruce Momjian 已提交
1148 1149
		 * if the tuple is null, then we assume there is nothing more to
		 * process so we just return null...
1150 1151 1152 1153 1154
		 */
		if (TupIsNull(slot))
		{
			result = NULL;
			break;
1155 1156
		}

B
Bruce Momjian 已提交
1157
		/*
B
Bruce Momjian 已提交
1158 1159
		 * if we have a junk filter, then project a new tuple with the junk
		 * removed.
1160
		 *
1161
		 * Store this new "clean" tuple in the junkfilter's resultSlot.
B
Bruce Momjian 已提交
1162 1163
		 * (Formerly, we stored it back over the "dirty" tuple, which is WRONG
		 * because that tuple slot has the wrong descriptor.)
1164
		 *
B
Bruce Momjian 已提交
1165
		 * Also, extract all the junk information we need.
1166
		 */
1167
		if ((junkfilter = estate->es_junkFilter) != NULL)
1168
		{
1169 1170
			Datum		datum;
			bool		isNull;
1171

B
Bruce Momjian 已提交
1172
			/*
1173 1174 1175 1176 1177 1178 1179 1180 1181
			 * extract the 'ctid' junk attribute.
			 */
			if (operation == CMD_UPDATE || operation == CMD_DELETE)
			{
				if (!ExecGetJunkAttribute(junkfilter,
										  slot,
										  "ctid",
										  &datum,
										  &isNull))
1182
					elog(ERROR, "could not find junk ctid column");
1183

1184
				/* shouldn't ever get a null result... */
1185
				if (isNull)
1186
					elog(ERROR, "ctid is NULL");
1187 1188

				tupleid = (ItemPointer) DatumGetPointer(datum);
B
Bruce Momjian 已提交
1189
				tuple_ctid = *tupleid;	/* make sure we don't free the ctid!! */
1190 1191
				tupleid = &tuple_ctid;
			}
B
Bruce Momjian 已提交
1192

1193 1194 1195
			/*
			 * Process any FOR UPDATE or FOR SHARE locking requested.
			 */
1196
			else if (estate->es_rowMarks != NIL)
1197
			{
1198
				ListCell   *l;
1199

B
Bruce Momjian 已提交
1200
		lmark:	;
1201
				foreach(l, estate->es_rowMarks)
1202
				{
1203
					ExecRowMark *erm = lfirst(l);
1204
					HeapTupleData tuple;
1205 1206 1207
					Buffer		buffer;
					ItemPointerData update_ctid;
					TransactionId update_xmax;
1208
					TupleTableSlot *newSlot;
B
Bruce Momjian 已提交
1209 1210
					LockTupleMode lockmode;
					HTSU_Result test;
1211

1212 1213 1214 1215 1216
					if (!ExecGetJunkAttribute(junkfilter,
											  slot,
											  erm->resname,
											  &datum,
											  &isNull))
1217
						elog(ERROR, "could not find junk \"%s\" column",
1218
							 erm->resname);
1219

1220
					/* shouldn't ever get a null result... */
1221
					if (isNull)
1222
						elog(ERROR, "\"%s\" is NULL", erm->resname);
1223

1224 1225
					tuple.t_self = *((ItemPointer) DatumGetPointer(datum));

1226 1227 1228 1229 1230 1231
					if (estate->es_forUpdate)
						lockmode = LockTupleExclusive;
					else
						lockmode = LockTupleShared;

					test = heap_lock_tuple(erm->relation, &tuple, &buffer,
1232 1233 1234
										   &update_ctid, &update_xmax,
										   estate->es_snapshot->curcid,
										   lockmode, estate->es_rowNoWait);
1235 1236 1237 1238
					ReleaseBuffer(buffer);
					switch (test)
					{
						case HeapTupleSelfUpdated:
1239 1240 1241
							/* treat it as deleted; do not process */
							goto lnext;

1242 1243 1244 1245
						case HeapTupleMayBeUpdated:
							break;

						case HeapTupleUpdated:
1246
							if (IsXactIsoLevelSerializable)
1247
								ereport(ERROR,
B
Bruce Momjian 已提交
1248 1249
								 (errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
								  errmsg("could not serialize access due to concurrent update")));
1250 1251
							if (!ItemPointerEquals(&update_ctid,
												   &tuple.t_self))
1252
							{
1253 1254 1255 1256
								/* updated, so look at updated version */
								newSlot = EvalPlanQual(estate,
													   erm->rti,
													   &update_ctid,
1257 1258
													   update_xmax,
													   estate->es_snapshot->curcid);
1259
								if (!TupIsNull(newSlot))
1260 1261 1262 1263 1264 1265
								{
									slot = newSlot;
									estate->es_useEvalPlan = true;
									goto lmark;
								}
							}
B
Bruce Momjian 已提交
1266 1267 1268

							/*
							 * if tuple was deleted or PlanQual failed for
B
Bruce Momjian 已提交
1269
							 * updated tuple - we must not return this tuple!
1270 1271
							 */
							goto lnext;
1272 1273

						default:
1274
							elog(ERROR, "unrecognized heap_lock_tuple status: %u",
1275
								 test);
1276
							return NULL;
1277 1278 1279
					}
				}
			}
1280

B
Bruce Momjian 已提交
1281
			/*
1282 1283 1284
			 * Finally create a new "clean" tuple with all junk attributes
			 * removed
			 */
1285
			slot = ExecFilterJunk(junkfilter, slot);
1286
		}
1287

B
Bruce Momjian 已提交
1288
		/*
B
Bruce Momjian 已提交
1289 1290 1291
		 * now that we have a tuple, do the appropriate thing with it.. either
		 * return it to the user, add it to a relation someplace, delete it
		 * from a relation, or modify some of its attributes.
1292 1293 1294
		 */
		switch (operation)
		{
1295
			case CMD_SELECT:
B
Bruce Momjian 已提交
1296
				ExecSelect(slot,	/* slot containing tuple */
1297
						   dest,	/* destination's tuple-receiver obj */
1298
						   estate);
1299 1300
				result = slot;
				break;
1301

1302
			case CMD_INSERT:
1303
				ExecInsert(slot, tupleid, estate);
1304 1305
				result = NULL;
				break;
1306

1307 1308 1309 1310
			case CMD_DELETE:
				ExecDelete(slot, tupleid, estate);
				result = NULL;
				break;
1311

1312
			case CMD_UPDATE:
1313
				ExecUpdate(slot, tupleid, estate);
1314 1315
				result = NULL;
				break;
1316

1317
			default:
1318 1319
				elog(ERROR, "unrecognized operation code: %d",
					 (int) operation);
1320
				result = NULL;
1321
				break;
1322
		}
B
Bruce Momjian 已提交
1323

B
Bruce Momjian 已提交
1324
		/*
B
Bruce Momjian 已提交
1325 1326 1327
		 * check our tuple count.. if we've processed the proper number then
		 * quit, else loop again and process more tuples.  Zero numberTuples
		 * means no limit.
1328
		 */
1329
		current_tuple_count++;
1330
		if (numberTuples && numberTuples == current_tuple_count)
1331
			break;
1332
	}
1333

1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349
	/*
	 * Process AFTER EACH STATEMENT triggers
	 */
	switch (operation)
	{
		case CMD_UPDATE:
			ExecASUpdateTriggers(estate, estate->es_result_relation_info);
			break;
		case CMD_DELETE:
			ExecASDeleteTriggers(estate, estate->es_result_relation_info);
			break;
		case CMD_INSERT:
			ExecASInsertTriggers(estate, estate->es_result_relation_info);
			break;
		default:
			/* do nothing */
1350
			break;
1351 1352
	}

B
Bruce Momjian 已提交
1353
	/*
B
Bruce Momjian 已提交
1354
	 * here, result is either a slot containing a tuple in the case of a
1355
	 * SELECT or NULL otherwise.
1356
	 */
1357
	return result;
1358 1359 1360
}

/* ----------------------------------------------------------------
1361
 *		ExecSelect
1362
 *
1363
 *		SELECTs are easy.. we just pass the tuple to the appropriate
1364
 *		print function.  The only complexity is when we do a
1365
 *		"SELECT INTO", in which case we insert the tuple into
1366 1367
 *		the appropriate relation (note: this is a newly created relation
 *		so we don't need to worry about indices or locks.)
1368 1369 1370
 * ----------------------------------------------------------------
 */
static void
1371
ExecSelect(TupleTableSlot *slot,
1372
		   DestReceiver *dest,
1373
		   EState *estate)
1374
{
B
Bruce Momjian 已提交
1375
	/*
B
Bruce Momjian 已提交
1376
	 * insert the tuple into the "into relation"
1377 1378
	 *
	 * XXX this probably ought to be replaced by a separate destination
1379 1380 1381
	 */
	if (estate->es_into_relation_descriptor != NULL)
	{
1382 1383 1384
		HeapTuple	tuple;

		tuple = ExecCopySlotTuple(slot);
1385
		heap_insert(estate->es_into_relation_descriptor, tuple,
1386 1387 1388
					estate->es_snapshot->curcid,
					estate->es_into_relation_use_wal,
					false);		/* never any point in using FSM */
1389 1390
		/* we know there are no indexes to update */
		heap_freetuple(tuple);
1391 1392 1393
		IncrAppended();
	}

B
Bruce Momjian 已提交
1394
	/*
1395
	 * send the tuple to the destination
1396
	 */
1397
	(*dest->receiveSlot) (slot, dest);
1398 1399
	IncrRetrieved();
	(estate->es_processed)++;
1400 1401 1402
}

/* ----------------------------------------------------------------
1403
 *		ExecInsert
1404
 *
1405
 *		INSERTs are trickier.. we have to insert the tuple into
1406 1407
 *		the base relation and insert appropriate tuples into the
 *		index relations.
1408 1409 1410
 * ----------------------------------------------------------------
 */
static void
1411
ExecInsert(TupleTableSlot *slot,
1412
		   ItemPointer tupleid,
1413
		   EState *estate)
1414
{
1415
	HeapTuple	tuple;
1416
	ResultRelInfo *resultRelInfo;
1417 1418
	Relation	resultRelationDesc;
	Oid			newId;
1419

B
Bruce Momjian 已提交
1420
	/*
B
Bruce Momjian 已提交
1421 1422
	 * get the heap tuple out of the tuple table slot, making sure we have a
	 * writable copy
1423
	 */
1424
	tuple = ExecMaterializeSlot(slot);
1425

B
Bruce Momjian 已提交
1426
	/*
1427
	 * get information on the (current) result relation
1428
	 */
1429 1430
	resultRelInfo = estate->es_result_relation_info;
	resultRelationDesc = resultRelInfo->ri_RelationDesc;
1431 1432

	/* BEFORE ROW INSERT Triggers */
1433
	if (resultRelInfo->ri_TrigDesc &&
B
Bruce Momjian 已提交
1434
		resultRelInfo->ri_TrigDesc->n_before_row[TRIGGER_EVENT_INSERT] > 0)
1435
	{
1436
		HeapTuple	newtuple;
1437

1438
		newtuple = ExecBRInsertTriggers(estate, resultRelInfo, tuple);
1439 1440 1441 1442 1443 1444

		if (newtuple == NULL)	/* "do nothing" */
			return;

		if (newtuple != tuple)	/* modified by Trigger(s) */
		{
1445
			/*
1446 1447
			 * Put the modified tuple into a slot for convenience of routines
			 * below.  We assume the tuple was allocated in per-tuple memory
B
Bruce Momjian 已提交
1448 1449
			 * context, and therefore will go away by itself. The tuple table
			 * slot should not try to clear it.
1450
			 */
1451 1452 1453 1454 1455 1456 1457 1458
			TupleTableSlot *newslot = estate->es_trig_tuple_slot;

			if (newslot->tts_tupleDescriptor != slot->tts_tupleDescriptor)
				ExecSetSlotDescriptor(newslot,
									  slot->tts_tupleDescriptor,
									  false);
			ExecStoreTuple(newtuple, newslot, InvalidBuffer, false);
			slot = newslot;
1459
			tuple = newtuple;
1460 1461 1462
		}
	}

B
Bruce Momjian 已提交
1463
	/*
1464
	 * Check the constraints of the tuple
1465 1466
	 */
	if (resultRelationDesc->rd_att->constr)
1467
		ExecConstraints(resultRelInfo, slot, estate);
1468

B
Bruce Momjian 已提交
1469
	/*
B
Bruce Momjian 已提交
1470
	 * insert the tuple
1471
	 *
B
Bruce Momjian 已提交
1472 1473
	 * Note: heap_insert returns the tid (location) of the new tuple in the
	 * t_self field.
1474
	 */
1475
	newId = heap_insert(resultRelationDesc, tuple,
1476 1477
						estate->es_snapshot->curcid,
						true, true);
1478

1479
	IncrAppended();
1480 1481
	(estate->es_processed)++;
	estate->es_lastoid = newId;
T
Tom Lane 已提交
1482
	setLastTid(&(tuple->t_self));
1483

B
Bruce Momjian 已提交
1484
	/*
1485
	 * insert index entries for tuple
1486
	 */
1487
	if (resultRelInfo->ri_NumIndices > 0)
1488
		ExecInsertIndexTuples(slot, &(tuple->t_self), estate, false);
1489 1490

	/* AFTER ROW INSERT Triggers */
1491
	ExecARInsertTriggers(estate, resultRelInfo, tuple);
1492 1493 1494
}

/* ----------------------------------------------------------------
1495
 *		ExecDelete
1496
 *
1497 1498
 *		DELETE is like UPDATE, except that we delete the tuple and no
 *		index modifications are needed
1499 1500 1501
 * ----------------------------------------------------------------
 */
static void
1502
ExecDelete(TupleTableSlot *slot,
1503
		   ItemPointer tupleid,
1504
		   EState *estate)
1505
{
1506
	ResultRelInfo *resultRelInfo;
B
Bruce Momjian 已提交
1507
	Relation	resultRelationDesc;
B
Bruce Momjian 已提交
1508
	HTSU_Result result;
1509 1510
	ItemPointerData update_ctid;
	TransactionId update_xmax;
1511

B
Bruce Momjian 已提交
1512
	/*
1513
	 * get information on the (current) result relation
1514
	 */
1515 1516
	resultRelInfo = estate->es_result_relation_info;
	resultRelationDesc = resultRelInfo->ri_RelationDesc;
1517 1518

	/* BEFORE ROW DELETE Triggers */
1519
	if (resultRelInfo->ri_TrigDesc &&
B
Bruce Momjian 已提交
1520
		resultRelInfo->ri_TrigDesc->n_before_row[TRIGGER_EVENT_DELETE] > 0)
1521
	{
1522
		bool		dodelete;
1523

1524
		dodelete = ExecBRDeleteTriggers(estate, resultRelInfo, tupleid,
1525
										estate->es_snapshot->curcid);
1526 1527 1528 1529 1530

		if (!dodelete)			/* "do nothing" */
			return;
	}

V
Vadim B. Mikheev 已提交
1531
	/*
B
Bruce Momjian 已提交
1532
	 * delete the tuple
1533
	 *
1534 1535
	 * Note: if es_crosscheck_snapshot isn't InvalidSnapshot, we check that
	 * the row to be deleted is visible to that snapshot, and throw a can't-
B
Bruce Momjian 已提交
1536
	 * serialize error if not.	This is a special-case behavior needed for
1537
	 * referential integrity updates in serializable transactions.
1538
	 */
1539
ldelete:;
1540
	result = heap_delete(resultRelationDesc, tupleid,
1541
						 &update_ctid, &update_xmax,
1542 1543
						 estate->es_snapshot->curcid,
						 estate->es_crosscheck_snapshot,
B
Bruce Momjian 已提交
1544
						 true /* wait for commit */ );
V
Vadim B. Mikheev 已提交
1545 1546 1547
	switch (result)
	{
		case HeapTupleSelfUpdated:
1548
			/* already deleted by self; nothing to do */
V
Vadim B. Mikheev 已提交
1549 1550 1551 1552 1553 1554
			return;

		case HeapTupleMayBeUpdated:
			break;

		case HeapTupleUpdated:
1555
			if (IsXactIsoLevelSerializable)
1556 1557
				ereport(ERROR,
						(errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
1558
						 errmsg("could not serialize access due to concurrent update")));
1559
			else if (!ItemPointerEquals(tupleid, &update_ctid))
1560
			{
1561
				TupleTableSlot *epqslot;
1562

1563 1564 1565
				epqslot = EvalPlanQual(estate,
									   resultRelInfo->ri_RangeTableIndex,
									   &update_ctid,
1566 1567
									   update_xmax,
									   estate->es_snapshot->curcid);
V
Vadim B. Mikheev 已提交
1568
				if (!TupIsNull(epqslot))
1569
				{
1570
					*tupleid = update_ctid;
1571 1572 1573
					goto ldelete;
				}
			}
1574
			/* tuple already deleted; nothing to do */
V
Vadim B. Mikheev 已提交
1575 1576 1577
			return;

		default:
1578
			elog(ERROR, "unrecognized heap_delete status: %u", result);
V
Vadim B. Mikheev 已提交
1579 1580
			return;
	}
1581 1582 1583 1584

	IncrDeleted();
	(estate->es_processed)++;

B
Bruce Momjian 已提交
1585
	/*
B
Bruce Momjian 已提交
1586
	 * Note: Normally one would think that we have to delete index tuples
1587
	 * associated with the heap tuple now...
1588
	 *
1589 1590 1591
	 * ... but in POSTGRES, we have no need to do this because VACUUM will
	 * take care of it later.  We can't delete index tuples immediately
	 * anyway, since the tuple is still visible to other transactions.
1592 1593 1594
	 */

	/* AFTER ROW DELETE Triggers */
1595
	ExecARDeleteTriggers(estate, resultRelInfo, tupleid);
1596 1597 1598
}

/* ----------------------------------------------------------------
1599
 *		ExecUpdate
1600
 *
1601 1602 1603 1604
 *		note: we can't run UPDATE queries with transactions
 *		off because UPDATEs are actually INSERTs and our
 *		scan will mistakenly loop forever, updating the tuple
 *		it just inserted..	This should be fixed but until it
1605 1606
 *		is, we don't want to get stuck in an infinite loop
 *		which corrupts your database..
1607 1608 1609
 * ----------------------------------------------------------------
 */
static void
1610
ExecUpdate(TupleTableSlot *slot,
B
Bruce Momjian 已提交
1611 1612
		   ItemPointer tupleid,
		   EState *estate)
1613
{
B
Bruce Momjian 已提交
1614
	HeapTuple	tuple;
1615
	ResultRelInfo *resultRelInfo;
B
Bruce Momjian 已提交
1616
	Relation	resultRelationDesc;
B
Bruce Momjian 已提交
1617
	HTSU_Result result;
1618 1619
	ItemPointerData update_ctid;
	TransactionId update_xmax;
1620

B
Bruce Momjian 已提交
1621
	/*
B
Bruce Momjian 已提交
1622
	 * abort the operation if not running transactions
1623 1624
	 */
	if (IsBootstrapProcessingMode())
1625
		elog(ERROR, "cannot UPDATE during bootstrap");
1626

B
Bruce Momjian 已提交
1627
	/*
B
Bruce Momjian 已提交
1628 1629
	 * get the heap tuple out of the tuple table slot, making sure we have a
	 * writable copy
1630
	 */
1631
	tuple = ExecMaterializeSlot(slot);
1632

B
Bruce Momjian 已提交
1633
	/*
1634
	 * get information on the (current) result relation
1635
	 */
1636 1637
	resultRelInfo = estate->es_result_relation_info;
	resultRelationDesc = resultRelInfo->ri_RelationDesc;
1638 1639

	/* BEFORE ROW UPDATE Triggers */
1640
	if (resultRelInfo->ri_TrigDesc &&
B
Bruce Momjian 已提交
1641
		resultRelInfo->ri_TrigDesc->n_before_row[TRIGGER_EVENT_UPDATE] > 0)
1642
	{
1643
		HeapTuple	newtuple;
1644

1645
		newtuple = ExecBRUpdateTriggers(estate, resultRelInfo,
1646
										tupleid, tuple,
1647
										estate->es_snapshot->curcid);
1648 1649 1650 1651 1652 1653

		if (newtuple == NULL)	/* "do nothing" */
			return;

		if (newtuple != tuple)	/* modified by Trigger(s) */
		{
1654
			/*
1655 1656
			 * Put the modified tuple into a slot for convenience of routines
			 * below.  We assume the tuple was allocated in per-tuple memory
B
Bruce Momjian 已提交
1657 1658
			 * context, and therefore will go away by itself. The tuple table
			 * slot should not try to clear it.
1659
			 */
1660 1661 1662 1663 1664 1665 1666 1667
			TupleTableSlot *newslot = estate->es_trig_tuple_slot;

			if (newslot->tts_tupleDescriptor != slot->tts_tupleDescriptor)
				ExecSetSlotDescriptor(newslot,
									  slot->tts_tupleDescriptor,
									  false);
			ExecStoreTuple(newtuple, newslot, InvalidBuffer, false);
			slot = newslot;
1668
			tuple = newtuple;
1669 1670 1671
		}
	}

B
Bruce Momjian 已提交
1672
	/*
1673
	 * Check the constraints of the tuple
1674
	 *
1675 1676
	 * If we generate a new candidate tuple after EvalPlanQual testing, we
	 * must loop back here and recheck constraints.  (We don't need to redo
B
Bruce Momjian 已提交
1677 1678 1679
	 * triggers, however.  If there are any BEFORE triggers then trigger.c
	 * will have done heap_lock_tuple to lock the correct tuple, so there's no
	 * need to do them again.)
1680
	 */
1681
lreplace:;
1682
	if (resultRelationDesc->rd_att->constr)
1683
		ExecConstraints(resultRelInfo, slot, estate);
1684

V
Vadim B. Mikheev 已提交
1685
	/*
B
Bruce Momjian 已提交
1686
	 * replace the heap tuple
1687
	 *
1688 1689
	 * Note: if es_crosscheck_snapshot isn't InvalidSnapshot, we check that
	 * the row to be updated is visible to that snapshot, and throw a can't-
B
Bruce Momjian 已提交
1690
	 * serialize error if not.	This is a special-case behavior needed for
1691
	 * referential integrity updates in serializable transactions.
1692
	 */
1693
	result = heap_update(resultRelationDesc, tupleid, tuple,
1694
						 &update_ctid, &update_xmax,
1695 1696
						 estate->es_snapshot->curcid,
						 estate->es_crosscheck_snapshot,
B
Bruce Momjian 已提交
1697
						 true /* wait for commit */ );
V
Vadim B. Mikheev 已提交
1698 1699 1700
	switch (result)
	{
		case HeapTupleSelfUpdated:
1701
			/* already deleted by self; nothing to do */
V
Vadim B. Mikheev 已提交
1702 1703 1704 1705 1706 1707
			return;

		case HeapTupleMayBeUpdated:
			break;

		case HeapTupleUpdated:
1708
			if (IsXactIsoLevelSerializable)
1709 1710
				ereport(ERROR,
						(errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
1711
						 errmsg("could not serialize access due to concurrent update")));
1712
			else if (!ItemPointerEquals(tupleid, &update_ctid))
1713
			{
1714
				TupleTableSlot *epqslot;
1715

1716 1717 1718
				epqslot = EvalPlanQual(estate,
									   resultRelInfo->ri_RangeTableIndex,
									   &update_ctid,
1719 1720
									   update_xmax,
									   estate->es_snapshot->curcid);
V
Vadim B. Mikheev 已提交
1721
				if (!TupIsNull(epqslot))
1722
				{
1723
					*tupleid = update_ctid;
1724 1725
					slot = ExecFilterJunk(estate->es_junkFilter, epqslot);
					tuple = ExecMaterializeSlot(slot);
1726 1727 1728
					goto lreplace;
				}
			}
1729
			/* tuple already deleted; nothing to do */
V
Vadim B. Mikheev 已提交
1730 1731 1732
			return;

		default:
1733
			elog(ERROR, "unrecognized heap_update status: %u", result);
V
Vadim B. Mikheev 已提交
1734
			return;
1735 1736 1737 1738 1739
	}

	IncrReplaced();
	(estate->es_processed)++;

B
Bruce Momjian 已提交
1740
	/*
B
Bruce Momjian 已提交
1741 1742 1743
	 * Note: instead of having to update the old index tuples associated with
	 * the heap tuple, all we do is form and insert new index tuples. This is
	 * because UPDATEs are actually DELETEs and INSERTs, and index tuple
1744
	 * deletion is done later by VACUUM (see notes in ExecDelete).	All we do
1745
	 * here is insert new index tuples.  -cim 9/27/89
1746 1747
	 */

B
Bruce Momjian 已提交
1748
	/*
1749
	 * insert index entries for tuple
1750
	 *
B
Bruce Momjian 已提交
1751 1752
	 * Note: heap_update returns the tid (location) of the new tuple in the
	 * t_self field.
1753
	 */
1754
	if (resultRelInfo->ri_NumIndices > 0)
1755
		ExecInsertIndexTuples(slot, &(tuple->t_self), estate, false);
1756 1757

	/* AFTER ROW UPDATE Triggers */
1758
	ExecARUpdateTriggers(estate, resultRelInfo, tupleid, tuple);
1759
}
V
Vadim B. Mikheev 已提交
1760

1761
static const char *
1762 1763
ExecRelCheck(ResultRelInfo *resultRelInfo,
			 TupleTableSlot *slot, EState *estate)
V
Vadim B. Mikheev 已提交
1764
{
1765
	Relation	rel = resultRelInfo->ri_RelationDesc;
1766 1767
	int			ncheck = rel->rd_att->constr->num_check;
	ConstrCheck *check = rel->rd_att->constr->check;
1768
	ExprContext *econtext;
1769
	MemoryContext oldContext;
1770 1771
	List	   *qual;
	int			i;
1772

1773 1774
	/*
	 * If first time through for this result relation, build expression
B
Bruce Momjian 已提交
1775 1776
	 * nodetrees for rel's constraint expressions.  Keep them in the per-query
	 * memory context so they'll survive throughout the query.
1777 1778 1779 1780 1781 1782 1783 1784
	 */
	if (resultRelInfo->ri_ConstraintExprs == NULL)
	{
		oldContext = MemoryContextSwitchTo(estate->es_query_cxt);
		resultRelInfo->ri_ConstraintExprs =
			(List **) palloc(ncheck * sizeof(List *));
		for (i = 0; i < ncheck; i++)
		{
1785 1786
			/* ExecQual wants implicit-AND form */
			qual = make_ands_implicit(stringToNode(check[i].ccbin));
1787
			resultRelInfo->ri_ConstraintExprs[i] = (List *)
1788
				ExecPrepareExpr((Expr *) qual, estate);
1789 1790 1791 1792
		}
		MemoryContextSwitchTo(oldContext);
	}

1793
	/*
B
Bruce Momjian 已提交
1794 1795
	 * We will use the EState's per-tuple context for evaluating constraint
	 * expressions (creating it if it's not already there).
1796
	 */
1797
	econtext = GetPerTupleExprContext(estate);
1798

1799 1800 1801 1802
	/* Arrange for econtext's scan tuple to be the tuple under test */
	econtext->ecxt_scantuple = slot;

	/* And evaluate the constraints */
1803 1804
	for (i = 0; i < ncheck; i++)
	{
1805
		qual = resultRelInfo->ri_ConstraintExprs[i];
1806

1807 1808
		/*
		 * NOTE: SQL92 specifies that a NULL result from a constraint
1809 1810
		 * expression is not to be treated as a failure.  Therefore, tell
		 * ExecQual to return TRUE for NULL.
1811
		 */
1812
		if (!ExecQual(qual, econtext, true))
1813
			return check[i].ccname;
1814 1815
	}

1816
	/* NULL result means no error */
1817
	return NULL;
V
Vadim B. Mikheev 已提交
1818 1819
}

1820
void
1821
ExecConstraints(ResultRelInfo *resultRelInfo,
1822
				TupleTableSlot *slot, EState *estate)
V
Vadim B. Mikheev 已提交
1823
{
1824
	Relation	rel = resultRelInfo->ri_RelationDesc;
1825 1826 1827
	TupleConstr *constr = rel->rd_att->constr;

	Assert(constr);
1828

1829
	if (constr->has_not_null)
V
Vadim B. Mikheev 已提交
1830
	{
1831
		int			natts = rel->rd_att->natts;
1832
		int			attrChk;
1833

1834
		for (attrChk = 1; attrChk <= natts; attrChk++)
1835
		{
B
Bruce Momjian 已提交
1836
			if (rel->rd_att->attrs[attrChk - 1]->attnotnull &&
1837
				slot_attisnull(slot, attrChk))
1838 1839
				ereport(ERROR,
						(errcode(ERRCODE_NOT_NULL_VIOLATION),
1840
						 errmsg("null value in column \"%s\" violates not-null constraint",
B
Bruce Momjian 已提交
1841
						NameStr(rel->rd_att->attrs[attrChk - 1]->attname))));
1842 1843 1844
		}
	}

1845
	if (constr->num_check > 0)
1846
	{
B
Bruce Momjian 已提交
1847
		const char *failed;
1848

1849
		if ((failed = ExecRelCheck(resultRelInfo, slot, estate)) != NULL)
1850 1851
			ereport(ERROR,
					(errcode(ERRCODE_CHECK_VIOLATION),
1852
					 errmsg("new row for relation \"%s\" violates check constraint \"%s\"",
1853
							RelationGetRelationName(rel), failed)));
1854
	}
V
Vadim B. Mikheev 已提交
1855
}
1856

1857 1858 1859 1860 1861
/*
 * Check a modified tuple to see if we want to process its updated version
 * under READ COMMITTED rules.
 *
 * See backend/executor/README for some info about how this works.
1862 1863 1864 1865 1866
 *
 *	estate - executor state data
 *	rti - rangetable index of table containing tuple
 *	*tid - t_ctid from the outdated tuple (ie, next updated version)
 *	priorXmax - t_xmax from the outdated tuple
1867
 *	curCid - command ID of current command of my transaction
1868 1869 1870 1871 1872 1873
 *
 * *tid is also an output parameter: it's modified to hold the TID of the
 * latest version of the tuple (note this may be changed even on failure)
 *
 * Returns a slot containing the new candidate update/delete tuple, or
 * NULL if we determine we shouldn't process the row.
1874
 */
B
Bruce Momjian 已提交
1875
TupleTableSlot *
1876
EvalPlanQual(EState *estate, Index rti,
1877
			 ItemPointer tid, TransactionId priorXmax, CommandId curCid)
1878
{
1879 1880
	evalPlanQual *epq;
	EState	   *epqstate;
B
Bruce Momjian 已提交
1881 1882
	Relation	relation;
	HeapTupleData tuple;
1883 1884
	HeapTuple	copyTuple = NULL;
	bool		endNode;
1885 1886 1887

	Assert(rti != 0);

1888 1889 1890 1891 1892 1893 1894 1895
	/*
	 * find relation containing target tuple
	 */
	if (estate->es_result_relation_info != NULL &&
		estate->es_result_relation_info->ri_RangeTableIndex == rti)
		relation = estate->es_result_relation_info->ri_RelationDesc;
	else
	{
1896
		ListCell   *l;
1897 1898

		relation = NULL;
1899
		foreach(l, estate->es_rowMarks)
1900
		{
1901
			if (((ExecRowMark *) lfirst(l))->rti == rti)
1902
			{
1903
				relation = ((ExecRowMark *) lfirst(l))->relation;
1904 1905 1906 1907
				break;
			}
		}
		if (relation == NULL)
1908
			elog(ERROR, "could not find RowMark for RT index %u", rti);
1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920
	}

	/*
	 * fetch tid tuple
	 *
	 * Loop here to deal with updated or busy tuples
	 */
	tuple.t_self = *tid;
	for (;;)
	{
		Buffer		buffer;

1921
		if (heap_fetch(relation, SnapshotDirty, &tuple, &buffer, true, NULL))
1922
		{
1923 1924
			/*
			 * If xmin isn't what we're expecting, the slot must have been
B
Bruce Momjian 已提交
1925 1926 1927
			 * recycled and reused for an unrelated tuple.	This implies that
			 * the latest version of the row was deleted, so we need do
			 * nothing.  (Should be safe to examine xmin without getting
1928 1929 1930 1931 1932 1933 1934 1935 1936
			 * buffer's content lock, since xmin never changes in an existing
			 * tuple.)
			 */
			if (!TransactionIdEquals(HeapTupleHeaderGetXmin(tuple.t_data),
									 priorXmax))
			{
				ReleaseBuffer(buffer);
				return NULL;
			}
1937

1938
			/* otherwise xmin should not be dirty... */
1939
			if (TransactionIdIsValid(SnapshotDirty->xmin))
1940
				elog(ERROR, "t_xmin is uncommitted in tuple to be updated");
1941 1942

			/*
B
Bruce Momjian 已提交
1943 1944
			 * If tuple is being updated by other transaction then we have to
			 * wait for its commit/abort.
1945
			 */
1946
			if (TransactionIdIsValid(SnapshotDirty->xmax))
1947 1948
			{
				ReleaseBuffer(buffer);
1949 1950
				XactLockTableWait(SnapshotDirty->xmax);
				continue;		/* loop back to repeat heap_fetch */
1951 1952
			}

1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970
			/*
			 * If tuple was inserted by our own transaction, we have to check
			 * cmin against curCid: cmin >= curCid means our command cannot
			 * see the tuple, so we should ignore it.  Without this we are
			 * open to the "Halloween problem" of indefinitely re-updating
			 * the same tuple.  (We need not check cmax because
			 * HeapTupleSatisfiesDirty will consider a tuple deleted by
			 * our transaction dead, regardless of cmax.)  We just checked
			 * that priorXmax == xmin, so we can test that variable instead
			 * of doing HeapTupleHeaderGetXmin again.
			 */
			if (TransactionIdIsCurrentTransactionId(priorXmax) &&
				HeapTupleHeaderGetCmin(tuple.t_data) >= curCid)
			{
				ReleaseBuffer(buffer);
				return NULL;
			}

1971 1972 1973 1974 1975 1976 1977 1978 1979
			/*
			 * We got tuple - now copy it for use by recheck query.
			 */
			copyTuple = heap_copytuple(&tuple);
			ReleaseBuffer(buffer);
			break;
		}

		/*
B
Bruce Momjian 已提交
1980 1981
		 * If the referenced slot was actually empty, the latest version of
		 * the row must have been deleted, so we need do nothing.
1982
		 */
1983
		if (tuple.t_data == NULL)
1984
		{
1985 1986
			ReleaseBuffer(buffer);
			return NULL;
1987 1988 1989
		}

		/*
1990
		 * As above, if xmin isn't what we're expecting, do nothing.
1991
		 */
1992 1993 1994 1995 1996 1997 1998 1999 2000
		if (!TransactionIdEquals(HeapTupleHeaderGetXmin(tuple.t_data),
								 priorXmax))
		{
			ReleaseBuffer(buffer);
			return NULL;
		}

		/*
		 * If we get here, the tuple was found but failed SnapshotDirty.
B
Bruce Momjian 已提交
2001 2002 2003 2004 2005 2006
		 * Assuming the xmin is either a committed xact or our own xact (as it
		 * certainly should be if we're trying to modify the tuple), this must
		 * mean that the row was updated or deleted by either a committed xact
		 * or our own xact.  If it was deleted, we can ignore it; if it was
		 * updated then chain up to the next version and repeat the whole
		 * test.
2007
		 *
B
Bruce Momjian 已提交
2008 2009
		 * As above, it should be safe to examine xmax and t_ctid without the
		 * buffer content lock, because they can't be changing.
2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023
		 */
		if (ItemPointerEquals(&tuple.t_self, &tuple.t_data->t_ctid))
		{
			/* deleted, so forget about it */
			ReleaseBuffer(buffer);
			return NULL;
		}

		/* updated, so look at the updated row */
		tuple.t_self = tuple.t_data->t_ctid;
		/* updated row should have xmin matching this xmax */
		priorXmax = HeapTupleHeaderGetXmax(tuple.t_data);
		ReleaseBuffer(buffer);
		/* loop back to fetch next in chain */
2024 2025 2026
	}

	/*
B
Bruce Momjian 已提交
2027 2028
	 * For UPDATE/DELETE we have to return tid of actual row we're executing
	 * PQ for.
2029 2030 2031 2032
	 */
	*tid = tuple.t_self;

	/*
2033
	 * Need to run a recheck subquery.	Find or create a PQ stack entry.
2034
	 */
2035
	epq = estate->es_evalPlanQual;
2036 2037
	endNode = true;

2038 2039
	if (epq != NULL && epq->rti == 0)
	{
2040
		/* Top PQ stack entry is idle, so re-use it */
2041
		Assert(!(estate->es_useEvalPlan) && epq->next == NULL);
2042 2043 2044 2045 2046
		epq->rti = rti;
		endNode = false;
	}

	/*
B
Bruce Momjian 已提交
2047 2048 2049 2050
	 * If this is request for another RTE - Ra, - then we have to check wasn't
	 * PlanQual requested for Ra already and if so then Ra' row was updated
	 * again and we have to re-start old execution for Ra and forget all what
	 * we done after Ra was suspended. Cool? -:))
2051
	 */
B
Bruce Momjian 已提交
2052
	if (epq != NULL && epq->rti != rti &&
2053
		epq->estate->es_evTuple[rti - 1] != NULL)
2054 2055 2056
	{
		do
		{
2057 2058
			evalPlanQual *oldepq;

2059
			/* stop execution */
2060 2061 2062 2063
			EvalPlanQualStop(epq);
			/* pop previous PlanQual from the stack */
			oldepq = epq->next;
			Assert(oldepq && oldepq->rti != 0);
2064 2065 2066
			/* push current PQ to freePQ stack */
			oldepq->free = epq;
			epq = oldepq;
2067
			estate->es_evalPlanQual = epq;
2068 2069 2070
		} while (epq->rti != rti);
	}

B
Bruce Momjian 已提交
2071
	/*
B
Bruce Momjian 已提交
2072 2073
	 * If we are requested for another RTE then we have to suspend execution
	 * of current PlanQual and start execution for new one.
2074 2075 2076 2077
	 */
	if (epq == NULL || epq->rti != rti)
	{
		/* try to reuse plan used previously */
B
Bruce Momjian 已提交
2078
		evalPlanQual *newepq = (epq != NULL) ? epq->free : NULL;
2079

2080
		if (newepq == NULL)		/* first call or freePQ stack is empty */
2081
		{
2082
			newepq = (evalPlanQual *) palloc0(sizeof(evalPlanQual));
2083
			newepq->free = NULL;
2084 2085
			newepq->estate = NULL;
			newepq->planstate = NULL;
2086 2087
		}
		else
2088
		{
2089 2090 2091
			/* recycle previously used PlanQual */
			Assert(newepq->estate == NULL);
			epq->free = NULL;
2092
		}
2093
		/* push current PQ to the stack */
2094
		newepq->next = epq;
2095
		epq = newepq;
2096
		estate->es_evalPlanQual = epq;
2097 2098 2099 2100
		epq->rti = rti;
		endNode = false;
	}

2101
	Assert(epq->rti == rti);
2102 2103

	/*
B
Bruce Momjian 已提交
2104 2105 2106 2107 2108 2109
	 * Ok - we're requested for the same RTE.  Unfortunately we still have to
	 * end and restart execution of the plan, because ExecReScan wouldn't
	 * ensure that upper plan nodes would reset themselves.  We could make
	 * that work if insertion of the target tuple were integrated with the
	 * Param mechanism somehow, so that the upper plan nodes know that their
	 * children's outputs have changed.
2110
	 *
B
Bruce Momjian 已提交
2111 2112
	 * Note that the stack of free evalPlanQual nodes is quite useless at the
	 * moment, since it only saves us from pallocing/releasing the
B
Bruce Momjian 已提交
2113 2114
	 * evalPlanQual nodes themselves.  But it will be useful once we implement
	 * ReScan instead of end/restart for re-using PlanQual nodes.
2115 2116
	 */
	if (endNode)
2117
	{
2118
		/* stop execution */
2119
		EvalPlanQualStop(epq);
2120
	}
2121

2122 2123 2124
	/*
	 * Initialize new recheck query.
	 *
B
Bruce Momjian 已提交
2125 2126
	 * Note: if we were re-using PlanQual plans via ExecReScan, we'd need to
	 * instead copy down changeable state from the top plan (including
B
Bruce Momjian 已提交
2127 2128
	 * es_result_relation_info, es_junkFilter) and reset locally changeable
	 * state in the epq (including es_param_exec_vals, es_evTupleNull).
2129 2130 2131
	 */
	EvalPlanQualStart(epq, estate, epq->next);

2132
	/*
B
Bruce Momjian 已提交
2133 2134
	 * free old RTE' tuple, if any, and store target tuple where relation's
	 * scan node will see it
2135
	 */
2136
	epqstate = epq->estate;
2137 2138 2139
	if (epqstate->es_evTuple[rti - 1] != NULL)
		heap_freetuple(epqstate->es_evTuple[rti - 1]);
	epqstate->es_evTuple[rti - 1] = copyTuple;
2140

2141
	return EvalPlanQualNext(estate);
2142 2143
}

B
Bruce Momjian 已提交
2144
static TupleTableSlot *
2145 2146
EvalPlanQualNext(EState *estate)
{
2147 2148
	evalPlanQual *epq = estate->es_evalPlanQual;
	MemoryContext oldcontext;
B
Bruce Momjian 已提交
2149
	TupleTableSlot *slot;
2150 2151 2152 2153

	Assert(epq->rti != 0);

lpqnext:;
2154
	oldcontext = MemoryContextSwitchTo(epq->estate->es_query_cxt);
2155
	slot = ExecProcNode(epq->planstate);
2156
	MemoryContextSwitchTo(oldcontext);
2157 2158 2159 2160 2161 2162

	/*
	 * No more tuples for this PQ. Continue previous one.
	 */
	if (TupIsNull(slot))
	{
2163 2164
		evalPlanQual *oldepq;

2165
		/* stop execution */
2166
		EvalPlanQualStop(epq);
2167
		/* pop old PQ from the stack */
2168 2169
		oldepq = epq->next;
		if (oldepq == NULL)
2170
		{
2171 2172 2173 2174
			/* this is the first (oldest) PQ - mark as free */
			epq->rti = 0;
			estate->es_useEvalPlan = false;
			/* and continue Query execution */
2175
			return NULL;
2176 2177 2178 2179 2180
		}
		Assert(oldepq->rti != 0);
		/* push current PQ to freePQ stack */
		oldepq->free = epq;
		epq = oldepq;
2181
		estate->es_evalPlanQual = epq;
2182 2183 2184
		goto lpqnext;
	}

2185
	return slot;
2186
}
2187 2188 2189 2190

static void
EndEvalPlanQual(EState *estate)
{
2191
	evalPlanQual *epq = estate->es_evalPlanQual;
2192

2193 2194
	if (epq->rti == 0)			/* plans already shutdowned */
	{
2195
		Assert(epq->next == NULL);
2196
		return;
2197
	}
2198 2199 2200

	for (;;)
	{
2201 2202
		evalPlanQual *oldepq;

2203
		/* stop execution */
2204
		EvalPlanQualStop(epq);
2205
		/* pop old PQ from the stack */
2206 2207
		oldepq = epq->next;
		if (oldepq == NULL)
2208
		{
2209 2210 2211
			/* this is the first (oldest) PQ - mark as free */
			epq->rti = 0;
			estate->es_useEvalPlan = false;
2212 2213 2214 2215 2216 2217
			break;
		}
		Assert(oldepq->rti != 0);
		/* push current PQ to freePQ stack */
		oldepq->free = epq;
		epq = oldepq;
2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234
		estate->es_evalPlanQual = epq;
	}
}

/*
 * Start execution of one level of PlanQual.
 *
 * This is a cut-down version of ExecutorStart(): we copy some state from
 * the top-level estate rather than initializing it fresh.
 */
static void
EvalPlanQualStart(evalPlanQual *epq, EState *estate, evalPlanQual *priorepq)
{
	EState	   *epqstate;
	int			rtsize;
	MemoryContext oldcontext;

2235
	rtsize = list_length(estate->es_range_table);
2236 2237 2238 2239 2240 2241

	epq->estate = epqstate = CreateExecutorState();

	oldcontext = MemoryContextSwitchTo(epqstate->es_query_cxt);

	/*
B
Bruce Momjian 已提交
2242 2243 2244 2245
	 * The epqstates share the top query's copy of unchanging state such as
	 * the snapshot, rangetable, result-rel info, and external Param info.
	 * They need their own copies of local state, including a tuple table,
	 * es_param_exec_vals, etc.
2246 2247 2248
	 */
	epqstate->es_direction = ForwardScanDirection;
	epqstate->es_snapshot = estate->es_snapshot;
2249
	epqstate->es_crosscheck_snapshot = estate->es_crosscheck_snapshot;
2250 2251 2252 2253 2254 2255
	epqstate->es_range_table = estate->es_range_table;
	epqstate->es_result_relations = estate->es_result_relations;
	epqstate->es_num_result_relations = estate->es_num_result_relations;
	epqstate->es_result_relation_info = estate->es_result_relation_info;
	epqstate->es_junkFilter = estate->es_junkFilter;
	epqstate->es_into_relation_descriptor = estate->es_into_relation_descriptor;
2256
	epqstate->es_into_relation_use_wal = estate->es_into_relation_use_wal;
2257 2258 2259 2260
	epqstate->es_param_list_info = estate->es_param_list_info;
	if (estate->es_topPlan->nParamExec > 0)
		epqstate->es_param_exec_vals = (ParamExecData *)
			palloc0(estate->es_topPlan->nParamExec * sizeof(ParamExecData));
2261
	epqstate->es_rowMarks = estate->es_rowMarks;
2262
	epqstate->es_forUpdate = estate->es_forUpdate;
2263
	epqstate->es_rowNoWait = estate->es_rowNoWait;
2264
	epqstate->es_instrument = estate->es_instrument;
2265 2266
	epqstate->es_select_into = estate->es_select_into;
	epqstate->es_into_oids = estate->es_into_oids;
2267
	epqstate->es_topPlan = estate->es_topPlan;
B
Bruce Momjian 已提交
2268

2269
	/*
B
Bruce Momjian 已提交
2270 2271 2272
	 * Each epqstate must have its own es_evTupleNull state, but all the stack
	 * entries share es_evTuple state.	This allows sub-rechecks to inherit
	 * the value being examined by an outer recheck.
2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314
	 */
	epqstate->es_evTupleNull = (bool *) palloc0(rtsize * sizeof(bool));
	if (priorepq == NULL)
		/* first PQ stack entry */
		epqstate->es_evTuple = (HeapTuple *)
			palloc0(rtsize * sizeof(HeapTuple));
	else
		/* later stack entries share the same storage */
		epqstate->es_evTuple = priorepq->estate->es_evTuple;

	epqstate->es_tupleTable =
		ExecCreateTupleTable(estate->es_tupleTable->size);

	epq->planstate = ExecInitNode(estate->es_topPlan, epqstate);

	MemoryContextSwitchTo(oldcontext);
}

/*
 * End execution of one level of PlanQual.
 *
 * This is a cut-down version of ExecutorEnd(); basically we want to do most
 * of the normal cleanup, but *not* close result relations (which we are
 * just sharing from the outer query).
 */
static void
EvalPlanQualStop(evalPlanQual *epq)
{
	EState	   *epqstate = epq->estate;
	MemoryContext oldcontext;

	oldcontext = MemoryContextSwitchTo(epqstate->es_query_cxt);

	ExecEndNode(epq->planstate);

	ExecDropTupleTable(epqstate->es_tupleTable, true);
	epqstate->es_tupleTable = NULL;

	if (epqstate->es_evTuple[epq->rti - 1] != NULL)
	{
		heap_freetuple(epqstate->es_evTuple[epq->rti - 1]);
		epqstate->es_evTuple[epq->rti - 1] = NULL;
2315
	}
2316 2317 2318 2319 2320 2321 2322

	MemoryContextSwitchTo(oldcontext);

	FreeExecutorState(epqstate);

	epq->estate = NULL;
	epq->planstate = NULL;
2323
}