execMain.c 70.8 KB
Newer Older
1 2
/*-------------------------------------------------------------------------
 *
3
 * execMain.c
4
 *	  top level executor interface routines
5 6
 *
 * INTERFACE ROUTINES
7 8 9
 *	ExecutorStart()
 *	ExecutorRun()
 *	ExecutorEnd()
10
 *
11 12 13 14
 *	The old ExecutorMain() has been replaced by ExecutorStart(),
 *	ExecutorRun() and ExecutorEnd()
 *
 *	These three procedures are the external interfaces to the executor.
15
 *	In each case, the query descriptor is required as an argument.
16
 *
17
 *	ExecutorStart() must be called at the beginning of execution of any
18 19 20
 *	query plan and ExecutorEnd() should always be called at the end of
 *	execution of a plan.
 *
21
 *	ExecutorRun accepts direction and count arguments that specify whether
22
 *	the plan is to be executed forwards, backwards, and for how many tuples.
23
 *
24
 * Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
B
Add:  
Bruce Momjian 已提交
25
 * Portions Copyright (c) 1994, Regents of the University of California
26 27 28
 *
 *
 * IDENTIFICATION
29
 *	  $PostgreSQL: pgsql/src/backend/executor/execMain.c,v 1.287 2007/02/20 17:32:14 tgl Exp $
30 31 32
 *
 *-------------------------------------------------------------------------
 */
33 34
#include "postgres.h"

35
#include "access/heapam.h"
36
#include "access/reloptions.h"
37 38
#include "access/transam.h"
#include "access/xact.h"
39
#include "catalog/heap.h"
40
#include "catalog/namespace.h"
41
#include "catalog/toasting.h"
42
#include "commands/tablespace.h"
43
#include "commands/trigger.h"
B
Bruce Momjian 已提交
44
#include "executor/execdebug.h"
45
#include "executor/instrument.h"
46
#include "executor/nodeSubplan.h"
B
Bruce Momjian 已提交
47
#include "miscadmin.h"
48
#include "optimizer/clauses.h"
49
#include "parser/parse_clause.h"
50
#include "parser/parsetree.h"
51
#include "storage/smgr.h"
B
Bruce Momjian 已提交
52
#include "utils/acl.h"
53
#include "utils/lsyscache.h"
54
#include "utils/memutils.h"
55

56

57 58 59 60 61 62 63 64 65
typedef struct evalPlanQual
{
	Index		rti;
	EState	   *estate;
	PlanState  *planstate;
	struct evalPlanQual *next;	/* stack of active PlanQual plans */
	struct evalPlanQual *free;	/* list of free PlanQual plans */
} evalPlanQual;

66
/* decls for local routines only used within this module */
67
static void InitPlan(QueryDesc *queryDesc, int eflags);
68
static void initResultRelInfo(ResultRelInfo *resultRelInfo,
B
Bruce Momjian 已提交
69 70
				  Index resultRelationIndex,
				  List *rangeTable,
71 72
				  CmdType operation,
				  bool doInstrument);
73
static TupleTableSlot *ExecutePlan(EState *estate, PlanState *planstate,
B
Bruce Momjian 已提交
74 75 76
			CmdType operation,
			long numberTuples,
			ScanDirection direction,
77
			DestReceiver *dest);
78
static void ExecSelect(TupleTableSlot *slot,
B
Bruce Momjian 已提交
79
		   DestReceiver *dest, EState *estate);
80
static void ExecInsert(TupleTableSlot *slot, ItemPointer tupleid,
B
Bruce Momjian 已提交
81 82
		   TupleTableSlot *planSlot,
		   DestReceiver *dest, EState *estate);
83
static void ExecDelete(ItemPointer tupleid,
B
Bruce Momjian 已提交
84 85
		   TupleTableSlot *planSlot,
		   DestReceiver *dest, EState *estate);
86
static void ExecUpdate(TupleTableSlot *slot, ItemPointer tupleid,
B
Bruce Momjian 已提交
87 88 89
		   TupleTableSlot *planSlot,
		   DestReceiver *dest, EState *estate);
static void ExecProcessReturning(ProjectionInfo *projectReturning,
90 91 92
					 TupleTableSlot *tupleSlot,
					 TupleTableSlot *planSlot,
					 DestReceiver *dest);
93
static TupleTableSlot *EvalPlanQualNext(EState *estate);
94
static void EndEvalPlanQual(EState *estate);
95
static void ExecCheckRTEPerms(RangeTblEntry *rte);
96 97
static void ExecCheckXactReadOnly(PlannedStmt *plannedstmt);
static void ExecCheckRangeTblReadOnly(List *rtable);
98
static void EvalPlanQualStart(evalPlanQual *epq, EState *estate,
B
Bruce Momjian 已提交
99
				  evalPlanQual *priorepq);
100
static void EvalPlanQualStop(evalPlanQual *epq);
101 102 103 104 105 106
static void OpenIntoRel(QueryDesc *queryDesc);
static void CloseIntoRel(QueryDesc *queryDesc);
static void intorel_startup(DestReceiver *self, int operation, TupleDesc typeinfo);
static void intorel_receive(TupleTableSlot *slot, DestReceiver *self);
static void intorel_shutdown(DestReceiver *self);
static void intorel_destroy(DestReceiver *self);
107

108 109
/* end of local decls */

110

111
/* ----------------------------------------------------------------
112 113 114 115 116
 *		ExecutorStart
 *
 *		This routine must be called at the beginning of any execution of any
 *		query plan
 *
117
 * Takes a QueryDesc previously created by CreateQueryDesc (it's not real
B
Bruce Momjian 已提交
118
 * clear why we bother to separate the two functions, but...).	The tupDesc
119 120
 * field of the QueryDesc is filled in to describe the tuples that will be
 * returned, and the internal fields (estate and planstate) are set up.
121
 *
122
 * eflags contains flag bits as described in executor.h.
123
 *
124 125
 * NB: the CurrentMemoryContext when this is called will become the parent
 * of the per-query context used for this Executor invocation.
126 127
 * ----------------------------------------------------------------
 */
128
void
129
ExecutorStart(QueryDesc *queryDesc, int eflags)
130
{
131
	EState	   *estate;
132
	MemoryContext oldcontext;
133

134
	/* sanity checks: queryDesc must not be started already */
135
	Assert(queryDesc != NULL);
136 137
	Assert(queryDesc->estate == NULL);

138
	/*
B
Bruce Momjian 已提交
139
	 * If the transaction is read-only, we need to check if any writes are
140
	 * planned to non-temporary tables.  EXPLAIN is considered read-only.
141
	 */
142
	if (XactReadOnly && !(eflags & EXEC_FLAG_EXPLAIN_ONLY))
143
		ExecCheckXactReadOnly(queryDesc->plannedstmt);
144

145
	/*
146
	 * Build EState, switch into per-query memory context for startup.
147 148 149 150
	 */
	estate = CreateExecutorState();
	queryDesc->estate = estate;

151 152 153 154 155
	oldcontext = MemoryContextSwitchTo(estate->es_query_cxt);

	/*
	 * Fill in parameters, if any, from queryDesc
	 */
156
	estate->es_param_list_info = queryDesc->params;
157

158
	if (queryDesc->plannedstmt->nParamExec > 0)
159
		estate->es_param_exec_vals = (ParamExecData *)
160
			palloc0(queryDesc->plannedstmt->nParamExec * sizeof(ParamExecData));
161

162
	/*
163
	 * Copy other important information into the EState
164
	 */
165 166 167
	estate->es_snapshot = queryDesc->snapshot;
	estate->es_crosscheck_snapshot = queryDesc->crosscheck_snapshot;
	estate->es_instrument = queryDesc->doInstrument;
168

169
	/*
170
	 * Initialize the plan state tree
171
	 */
172
	InitPlan(queryDesc, eflags);
173 174

	MemoryContextSwitchTo(oldcontext);
175 176 177
}

/* ----------------------------------------------------------------
178 179 180 181 182 183 184
 *		ExecutorRun
 *
 *		This is the main routine of the executor module. It accepts
 *		the query descriptor from the traffic cop and executes the
 *		query plan.
 *
 *		ExecutorStart must have been called already.
185
 *
186 187 188
 *		If direction is NoMovementScanDirection then nothing is done
 *		except to start up/shut down the destination.  Otherwise,
 *		we retrieve up to 'count' tuples in the specified direction.
189
 *
190
 *		Note: count = 0 is interpreted as no portal limit, i.e., run to
191
 *		completion.
192
 *
193 194
 * ----------------------------------------------------------------
 */
195
TupleTableSlot *
196
ExecutorRun(QueryDesc *queryDesc,
197
			ScanDirection direction, long count)
198
{
199
	EState	   *estate;
200
	CmdType		operation;
201
	DestReceiver *dest;
202
	bool		sendTuples;
203
	TupleTableSlot *result;
204 205 206 207 208 209 210 211
	MemoryContext oldcontext;

	/* sanity checks */
	Assert(queryDesc != NULL);

	estate = queryDesc->estate;

	Assert(estate != NULL);
212

B
Bruce Momjian 已提交
213
	/*
214
	 * Switch into per-query memory context
215
	 */
216
	oldcontext = MemoryContextSwitchTo(estate->es_query_cxt);
217

B
Bruce Momjian 已提交
218
	/*
B
Bruce Momjian 已提交
219
	 * extract information from the query descriptor and the query feature.
220
	 */
221 222 223
	operation = queryDesc->operation;
	dest = queryDesc->dest;

B
Bruce Momjian 已提交
224
	/*
225
	 * startup tuple receiver, if we will be emitting tuples
226
	 */
227 228
	estate->es_processed = 0;
	estate->es_lastoid = InvalidOid;
229

230
	sendTuples = (operation == CMD_SELECT ||
231
				  queryDesc->plannedstmt->returningLists);
232 233 234

	if (sendTuples)
		(*dest->rStartup) (dest, operation, queryDesc->tupDesc);
235

236 237 238
	/*
	 * run plan
	 */
239
	if (ScanDirectionIsNoMovement(direction))
240 241 242
		result = NULL;
	else
		result = ExecutePlan(estate,
243
							 queryDesc->planstate,
244 245 246
							 operation,
							 count,
							 direction,
247
							 dest);
248

249
	/*
250
	 * shutdown tuple receiver, if we started it
251
	 */
252 253
	if (sendTuples)
		(*dest->rShutdown) (dest);
254

255 256
	MemoryContextSwitchTo(oldcontext);

257
	return result;
258 259 260
}

/* ----------------------------------------------------------------
261 262
 *		ExecutorEnd
 *
263
 *		This routine must be called at the end of execution of any
264
 *		query plan
265 266 267
 * ----------------------------------------------------------------
 */
void
268
ExecutorEnd(QueryDesc *queryDesc)
269
{
270
	EState	   *estate;
271
	MemoryContext oldcontext;
272

273 274
	/* sanity checks */
	Assert(queryDesc != NULL);
275

276 277
	estate = queryDesc->estate;

278
	Assert(estate != NULL);
279

280
	/*
281
	 * Switch into per-query memory context to run ExecEndPlan
282
	 */
283 284 285
	oldcontext = MemoryContextSwitchTo(estate->es_query_cxt);

	ExecEndPlan(queryDesc->planstate, estate);
286

287 288 289 290 291 292
	/*
	 * Close the SELECT INTO relation if any
	 */
	if (estate->es_select_into)
		CloseIntoRel(queryDesc);

293
	/*
294
	 * Must switch out of context before destroying it
295
	 */
296
	MemoryContextSwitchTo(oldcontext);
297

298
	/*
299 300
	 * Release EState and per-query memory context.  This should release
	 * everything the executor has allocated.
301
	 */
302 303 304 305 306 307
	FreeExecutorState(estate);

	/* Reset queryDesc fields that no longer point to anything */
	queryDesc->tupDesc = NULL;
	queryDesc->estate = NULL;
	queryDesc->planstate = NULL;
308
}
309

310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345
/* ----------------------------------------------------------------
 *		ExecutorRewind
 *
 *		This routine may be called on an open queryDesc to rewind it
 *		to the start.
 * ----------------------------------------------------------------
 */
void
ExecutorRewind(QueryDesc *queryDesc)
{
	EState	   *estate;
	MemoryContext oldcontext;

	/* sanity checks */
	Assert(queryDesc != NULL);

	estate = queryDesc->estate;

	Assert(estate != NULL);

	/* It's probably not sensible to rescan updating queries */
	Assert(queryDesc->operation == CMD_SELECT);

	/*
	 * Switch into per-query memory context
	 */
	oldcontext = MemoryContextSwitchTo(estate->es_query_cxt);

	/*
	 * rescan plan
	 */
	ExecReScan(queryDesc->planstate, NULL);

	MemoryContextSwitchTo(oldcontext);
}

346

347 348 349 350
/*
 * ExecCheckRTPerms
 *		Check access permissions for all relations listed in a range table.
 */
351
void
352
ExecCheckRTPerms(List *rangeTable)
353
{
354
	ListCell   *l;
355

356
	foreach(l, rangeTable)
357
	{
358
		RangeTblEntry *rte = lfirst(l);
359

360
		ExecCheckRTEPerms(rte);
361 362 363 364 365 366 367 368
	}
}

/*
 * ExecCheckRTEPerms
 *		Check access permissions for a single RTE.
 */
static void
369
ExecCheckRTEPerms(RangeTblEntry *rte)
370
{
371
	AclMode		requiredPerms;
372
	Oid			relOid;
B
Bruce Momjian 已提交
373
	Oid			userid;
374

B
Bruce Momjian 已提交
375
	/*
B
Bruce Momjian 已提交
376 377 378 379 380 381
	 * Only plain-relation RTEs need to be checked here.  Subquery RTEs are
	 * checked by ExecInitSubqueryScan if the subquery is still a separate
	 * subquery --- if it's been pulled up into our query level then the RTEs
	 * are in our rangetable and will be checked here. Function RTEs are
	 * checked by init_fcache when the function is prepared for execution.
	 * Join and special RTEs need no checks.
B
Bruce Momjian 已提交
382
	 */
383
	if (rte->rtekind != RTE_RELATION)
384 385
		return;

386 387 388 389 390 391 392
	/*
	 * No work if requiredPerms is empty.
	 */
	requiredPerms = rte->requiredPerms;
	if (requiredPerms == 0)
		return;

393
	relOid = rte->relid;
394 395

	/*
B
Bruce Momjian 已提交
396
	 * userid to check as: current user unless we have a setuid indication.
397
	 *
398 399 400 401
	 * Note: GetUserId() is presently fast enough that there's no harm in
	 * calling it separately for each RTE.	If that stops being true, we could
	 * call it once in ExecCheckRTPerms and pass the userid down from there.
	 * But for now, no need for the extra clutter.
402
	 */
403
	userid = rte->checkAsUser ? rte->checkAsUser : GetUserId();
404

405
	/*
B
Bruce Momjian 已提交
406
	 * We must have *all* the requiredPerms bits, so use aclmask not aclcheck.
407
	 */
408 409 410 411
	if (pg_class_aclmask(relOid, userid, requiredPerms, ACLMASK_ALL)
		!= requiredPerms)
		aclcheck_error(ACLCHECK_NO_PRIV, ACL_KIND_CLASS,
					   get_rel_name(relOid));
412 413
}

414 415 416
/*
 * Check that the query does not imply any writes to non-temp tables.
 */
417
static void
418
ExecCheckXactReadOnly(PlannedStmt *plannedstmt)
419
{
420 421 422 423 424
	/*
	 * CREATE TABLE AS or SELECT INTO?
	 *
	 * XXX should we allow this if the destination is temp?
	 */
425
	if (plannedstmt->into != NULL)
426 427
		goto fail;

428
	/* Fail if write permissions are requested on any non-temp table */
429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445
	ExecCheckRangeTblReadOnly(plannedstmt->rtable);

	return;

fail:
	ereport(ERROR,
			(errcode(ERRCODE_READ_ONLY_SQL_TRANSACTION),
			 errmsg("transaction is read-only")));
}

static void
ExecCheckRangeTblReadOnly(List *rtable)
{
	ListCell   *l;

	/* Fail if write permissions are requested on any non-temp table */
	foreach(l, rtable)
446
	{
447
		RangeTblEntry *rte = lfirst(l);
448

449
		if (rte->rtekind == RTE_SUBQUERY)
450
		{
451 452
			Assert(!rte->subquery->into);
			ExecCheckRangeTblReadOnly(rte->subquery->rtable);
453 454
			continue;
		}
455

456 457
		if (rte->rtekind != RTE_RELATION)
			continue;
458

459 460
		if ((rte->requiredPerms & (~ACL_SELECT)) == 0)
			continue;
461

462 463
		if (isTempNamespace(get_rel_namespace(rte->relid)))
			continue;
464

465
		goto fail;
466 467 468 469 470
	}

	return;

fail:
471 472 473
	ereport(ERROR,
			(errcode(ERRCODE_READ_ONLY_SQL_TRANSACTION),
			 errmsg("transaction is read-only")));
474 475 476
}


477
/* ----------------------------------------------------------------
478 479 480 481
 *		InitPlan
 *
 *		Initializes the query plan: open files, allocate storage
 *		and start up the rule manager
482 483
 * ----------------------------------------------------------------
 */
484
static void
485
InitPlan(QueryDesc *queryDesc, int eflags)
486
{
487
	CmdType		operation = queryDesc->operation;
488 489 490
	PlannedStmt *plannedstmt = queryDesc->plannedstmt;
	Plan	   *plan = plannedstmt->planTree;
	List	   *rangeTable = plannedstmt->rtable;
B
Bruce Momjian 已提交
491
	EState	   *estate = queryDesc->estate;
492
	PlanState  *planstate;
B
Bruce Momjian 已提交
493
	TupleDesc	tupType;
494
	ListCell   *l;
495

496
	/*
B
Bruce Momjian 已提交
497 498
	 * Do permissions checks.  It's sufficient to examine the query's top
	 * rangetable here --- subplan RTEs will be checked during
499
	 * ExecInitSubPlan().
500
	 */
501
	ExecCheckRTPerms(rangeTable);
502

B
Bruce Momjian 已提交
503
	/*
B
Bruce Momjian 已提交
504
	 * initialize the node's execution state
505
	 */
506 507
	estate->es_range_table = rangeTable;

B
Bruce Momjian 已提交
508
	/*
509
	 * initialize result relation stuff
510
	 */
511
	if (plannedstmt->resultRelations)
512
	{
513 514
		List	   *resultRelations = plannedstmt->resultRelations;
		int			numResultRelations = list_length(resultRelations);
515
		ResultRelInfo *resultRelInfos;
516
		ResultRelInfo *resultRelInfo;
B
Bruce Momjian 已提交
517

518 519 520 521
		resultRelInfos = (ResultRelInfo *)
			palloc(numResultRelations * sizeof(ResultRelInfo));
		resultRelInfo = resultRelInfos;
		foreach(l, resultRelations)
522
		{
523 524
			initResultRelInfo(resultRelInfo,
							  lfirst_int(l),
525
							  rangeTable,
526 527
							  operation,
							  estate->es_instrument);
528
			resultRelInfo++;
529 530 531 532 533
		}
		estate->es_result_relations = resultRelInfos;
		estate->es_num_result_relations = numResultRelations;
		/* Initialize to first or only result rel */
		estate->es_result_relation_info = resultRelInfos;
534
	}
535 536
	else
	{
B
Bruce Momjian 已提交
537
		/*
B
Bruce Momjian 已提交
538
		 * if no result relation, then set state appropriately
539
		 */
540 541
		estate->es_result_relations = NULL;
		estate->es_num_result_relations = 0;
542 543 544
		estate->es_result_relation_info = NULL;
	}

545
	/*
T
Tom Lane 已提交
546
	 * Detect whether we're doing SELECT INTO.  If so, set the es_into_oids
B
Bruce Momjian 已提交
547
	 * flag appropriately so that the plan tree will be initialized with the
548
	 * correct tuple descriptors.  (Other SELECT INTO stuff comes later.)
549
	 */
550
	estate->es_select_into = false;
551
	if (operation == CMD_SELECT && plannedstmt->into != NULL)
552
	{
553
		estate->es_select_into = true;
554
		estate->es_into_oids = interpretOidsOption(plannedstmt->into->options);
555 556
	}

557
	/*
558 559 560
	 * Have to lock relations selected FOR UPDATE/FOR SHARE before we
	 * initialize the plan tree, else we'd be doing a lock upgrade.
	 * While we are at it, build the ExecRowMark list.
561
	 */
562
	estate->es_rowMarks = NIL;
563
	foreach(l, plannedstmt->rowMarks)
564
	{
565 566 567 568 569 570 571 572 573 574 575
		RowMarkClause *rc = (RowMarkClause *) lfirst(l);
		Oid			relid = getrelid(rc->rti, rangeTable);
		Relation	relation;
		ExecRowMark *erm;

		relation = heap_open(relid, RowShareLock);
		erm = (ExecRowMark *) palloc(sizeof(ExecRowMark));
		erm->relation = relation;
		erm->rti = rc->rti;
		erm->forUpdate = rc->forUpdate;
		erm->noWait = rc->noWait;
576 577
		/* We'll set up ctidAttno below */
		erm->ctidAttNo = InvalidAttrNumber;
578
		estate->es_rowMarks = lappend(estate->es_rowMarks, erm);
579
	}
580

B
Bruce Momjian 已提交
581
	/*
B
Bruce Momjian 已提交
582 583 584
	 * initialize the executor "tuple" table.  We need slots for all the plan
	 * nodes, plus possibly output slots for the junkfilter(s). At this point
	 * we aren't sure if we need junkfilters, so just add slots for them
585 586
	 * unconditionally.  Also, if it's not a SELECT, set up a slot for use for
	 * trigger output tuples.
587 588
	 */
	{
589
		int			nSlots = ExecCountSlotsNode(plan);
590

591 592
		if (plannedstmt->resultRelations != NIL)
			nSlots += list_length(plannedstmt->resultRelations);
593 594
		else
			nSlots += 1;
595
		if (operation != CMD_SELECT)
596
			nSlots++;			/* for es_trig_tuple_slot */
597
		if (plannedstmt->returningLists)
598
			nSlots++;			/* for RETURNING projection */
599

600
		estate->es_tupleTable = ExecCreateTupleTable(nSlots);
601 602 603 604

		if (operation != CMD_SELECT)
			estate->es_trig_tuple_slot =
				ExecAllocTableSlot(estate->es_tupleTable);
605
	}
606

607
	/* mark EvalPlanQual not active */
608
	estate->es_plannedstmt = plannedstmt;
609 610
	estate->es_evalPlanQual = NULL;
	estate->es_evTupleNull = NULL;
611
	estate->es_evTuple = NULL;
612 613
	estate->es_useEvalPlan = false;

B
Bruce Momjian 已提交
614
	/*
B
Bruce Momjian 已提交
615 616 617
	 * initialize the private state information for all the nodes in the query
	 * tree.  This opens files, allocates storage and leaves us ready to start
	 * processing tuples.
618
	 */
619
	planstate = ExecInitNode(plan, estate, eflags);
620

B
Bruce Momjian 已提交
621
	/*
B
Bruce Momjian 已提交
622 623 624
	 * Get the tuple descriptor describing the type of tuples to return. (this
	 * is especially important if we are creating a relation with "SELECT
	 * INTO")
625
	 */
626
	tupType = ExecGetResultType(planstate);
627

B
Bruce Momjian 已提交
628
	/*
B
Bruce Momjian 已提交
629 630 631 632 633 634
	 * Initialize the junk filter if needed.  SELECT and INSERT queries need a
	 * filter if there are any junk attrs in the tlist.  INSERT and SELECT
	 * INTO also need a filter if the plan may return raw disk tuples (else
	 * heap_insert will be scribbling on the source relation!). UPDATE and
	 * DELETE always need a filter, since there's always a junk 'ctid'
	 * attribute present --- no need to look first.
635 636
	 */
	{
637
		bool		junk_filter_needed = false;
638
		ListCell   *tlist;
639

640
		switch (operation)
641
		{
642 643
			case CMD_SELECT:
			case CMD_INSERT:
644
				foreach(tlist, plan->targetlist)
645
				{
646 647
					TargetEntry *tle = (TargetEntry *) lfirst(tlist);

648
					if (tle->resjunk)
649 650 651 652
					{
						junk_filter_needed = true;
						break;
					}
653
				}
654
				if (!junk_filter_needed &&
655
					(operation == CMD_INSERT || estate->es_select_into) &&
656 657
					ExecMayReturnRawTuples(planstate))
					junk_filter_needed = true;
658 659 660 661 662 663 664
				break;
			case CMD_UPDATE:
			case CMD_DELETE:
				junk_filter_needed = true;
				break;
			default:
				break;
665 666
		}

667
		if (junk_filter_needed)
668
		{
669
			/*
B
Bruce Momjian 已提交
670 671 672
			 * If there are multiple result relations, each one needs its own
			 * junk filter.  Note this is only possible for UPDATE/DELETE, so
			 * we can't be fooled by some needing a filter and some not.
673
			 */
674
			if (list_length(plannedstmt->resultRelations) > 1)
675
			{
676 677
				PlanState **appendplans;
				int			as_nplans;
678
				ResultRelInfo *resultRelInfo;
679
				int			i;
680 681 682 683

				/* Top plan had better be an Append here. */
				Assert(IsA(plan, Append));
				Assert(((Append *) plan)->isTarget);
684 685 686 687
				Assert(IsA(planstate, AppendState));
				appendplans = ((AppendState *) planstate)->appendplans;
				as_nplans = ((AppendState *) planstate)->as_nplans;
				Assert(as_nplans == estate->es_num_result_relations);
688
				resultRelInfo = estate->es_result_relations;
689
				for (i = 0; i < as_nplans; i++)
690
				{
691
					PlanState  *subplan = appendplans[i];
692 693
					JunkFilter *j;

694
					j = ExecInitJunkFilter(subplan->plan->targetlist,
B
Bruce Momjian 已提交
695 696
							resultRelInfo->ri_RelationDesc->rd_att->tdhasoid,
								  ExecAllocTableSlot(estate->es_tupleTable));
697 698 699 700 701 702 703 704 705 706
					/*
					 * Since it must be UPDATE/DELETE, there had better be
					 * a "ctid" junk attribute in the tlist ... but ctid could
					 * be at a different resno for each result relation.
					 * We look up the ctid resnos now and save them in the
					 * junkfilters.
					 */
					j->jf_junkAttNo = ExecFindJunkAttribute(j, "ctid");
					if (!AttributeNumberIsValid(j->jf_junkAttNo))
						elog(ERROR, "could not find junk ctid column");
707 708 709
					resultRelInfo->ri_junkFilter = j;
					resultRelInfo++;
				}
B
Bruce Momjian 已提交
710

711
				/*
B
Bruce Momjian 已提交
712 713
				 * Set active junkfilter too; at this point ExecInitAppend has
				 * already selected an active result relation...
714 715 716 717 718 719 720
				 */
				estate->es_junkFilter =
					estate->es_result_relation_info->ri_junkFilter;
			}
			else
			{
				/* Normal case with just one JunkFilter */
721
				JunkFilter *j;
722

723
				j = ExecInitJunkFilter(planstate->plan->targetlist,
724
									   tupType->tdhasoid,
B
Bruce Momjian 已提交
725
								  ExecAllocTableSlot(estate->es_tupleTable));
726 727 728
				estate->es_junkFilter = j;
				if (estate->es_result_relation_info)
					estate->es_result_relation_info->ri_junkFilter = j;
729

730
				if (operation == CMD_SELECT)
731 732
				{
					/* For SELECT, want to return the cleaned tuple type */
733
					tupType = j->jf_cleanTupType;
734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753
					/* For SELECT FOR UPDATE/SHARE, find the ctid attrs now */
					foreach(l, estate->es_rowMarks)
					{
						ExecRowMark *erm = (ExecRowMark *) lfirst(l);
						char		resname[32];

						snprintf(resname, sizeof(resname), "ctid%u", erm->rti);
						erm->ctidAttNo = ExecFindJunkAttribute(j, resname);
						if (!AttributeNumberIsValid(erm->ctidAttNo))
							elog(ERROR, "could not find junk \"%s\" column",
								 resname);
					}
				}
				else if (operation == CMD_UPDATE || operation == CMD_DELETE)
				{
					/* For UPDATE/DELETE, find the ctid junk attr now */
					j->jf_junkAttNo = ExecFindJunkAttribute(j, "ctid");
					if (!AttributeNumberIsValid(j->jf_junkAttNo))
						elog(ERROR, "could not find junk ctid column");
				}
754
			}
755 756 757 758
		}
		else
			estate->es_junkFilter = NULL;
	}
759

B
Bruce Momjian 已提交
760
	/*
761
	 * Initialize RETURNING projections if needed.
762
	 */
763
	if (plannedstmt->returningLists)
764
	{
765 766 767
		TupleTableSlot *slot;
		ExprContext *econtext;
		ResultRelInfo *resultRelInfo;
768

769
		/*
770 771
		 * We set QueryDesc.tupDesc to be the RETURNING rowtype in this case.
		 * We assume all the sublists will generate the same output tupdesc.
772
		 */
773
		tupType = ExecTypeFromTL((List *) linitial(plannedstmt->returningLists),
774
								 false);
775

776 777 778 779 780
		/* Set up a slot for the output of the RETURNING projection(s) */
		slot = ExecAllocTableSlot(estate->es_tupleTable);
		ExecSetSlotDescriptor(slot, tupType);
		/* Need an econtext too */
		econtext = CreateExprContext(estate);
781

782
		/*
B
Bruce Momjian 已提交
783 784
		 * Build a projection for each result rel.	Note that any SubPlans in
		 * the RETURNING lists get attached to the topmost plan node.
785
		 */
786
		Assert(list_length(plannedstmt->returningLists) == estate->es_num_result_relations);
787
		resultRelInfo = estate->es_result_relations;
788
		foreach(l, plannedstmt->returningLists)
789
		{
B
Bruce Momjian 已提交
790 791
			List	   *rlist = (List *) lfirst(l);
			List	   *rliststate;
792

793 794
			rliststate = (List *) ExecInitExpr((Expr *) rlist, planstate);
			resultRelInfo->ri_projectReturning =
795 796
				ExecBuildProjectionInfo(rliststate, econtext, slot,
									   resultRelInfo->ri_RelationDesc->rd_att);
797
			resultRelInfo++;
798
		}
B
Bruce Momjian 已提交
799

800
		/*
B
Bruce Momjian 已提交
801 802 803
		 * Because we already ran ExecInitNode() for the top plan node, any
		 * subplans we just attached to it won't have been initialized; so we
		 * have to do it here.	(Ugly, but the alternatives seem worse.)
804
		 */
805 806 807
		foreach(l, planstate->subPlan)
		{
			SubPlanState *sstate = (SubPlanState *) lfirst(l);
808

809
			Assert(IsA(sstate, SubPlanState));
B
Bruce Momjian 已提交
810
			if (sstate->planstate == NULL)		/* already inited? */
811 812
				ExecInitSubPlan(sstate, estate, eflags);
		}
813 814
	}

815 816
	queryDesc->tupDesc = tupType;
	queryDesc->planstate = planstate;
817 818 819 820 821 822 823 824 825 826

	/*
	 * If doing SELECT INTO, initialize the "into" relation.  We must wait
	 * till now so we have the "clean" result tuple type to create the new
	 * table from.
	 *
	 * If EXPLAIN, skip creating the "into" relation.
	 */
	if (estate->es_select_into && !(eflags & EXEC_FLAG_EXPLAIN_ONLY))
		OpenIntoRel(queryDesc);
827 828
}

829 830 831 832 833 834 835
/*
 * Initialize ResultRelInfo data for one result relation
 */
static void
initResultRelInfo(ResultRelInfo *resultRelInfo,
				  Index resultRelationIndex,
				  List *rangeTable,
836 837
				  CmdType operation,
				  bool doInstrument)
838 839 840 841 842 843 844 845 846 847
{
	Oid			resultRelationOid;
	Relation	resultRelationDesc;

	resultRelationOid = getrelid(resultRelationIndex, rangeTable);
	resultRelationDesc = heap_open(resultRelationOid, RowExclusiveLock);

	switch (resultRelationDesc->rd_rel->relkind)
	{
		case RELKIND_SEQUENCE:
848 849
			ereport(ERROR,
					(errcode(ERRCODE_WRONG_OBJECT_TYPE),
850
					 errmsg("cannot change sequence \"%s\"",
B
Bruce Momjian 已提交
851
							RelationGetRelationName(resultRelationDesc))));
852 853
			break;
		case RELKIND_TOASTVALUE:
854 855
			ereport(ERROR,
					(errcode(ERRCODE_WRONG_OBJECT_TYPE),
856
					 errmsg("cannot change TOAST relation \"%s\"",
B
Bruce Momjian 已提交
857
							RelationGetRelationName(resultRelationDesc))));
858 859
			break;
		case RELKIND_VIEW:
860 861
			ereport(ERROR,
					(errcode(ERRCODE_WRONG_OBJECT_TYPE),
862
					 errmsg("cannot change view \"%s\"",
B
Bruce Momjian 已提交
863
							RelationGetRelationName(resultRelationDesc))));
864 865 866 867 868 869 870 871 872 873
			break;
	}

	MemSet(resultRelInfo, 0, sizeof(ResultRelInfo));
	resultRelInfo->type = T_ResultRelInfo;
	resultRelInfo->ri_RangeTableIndex = resultRelationIndex;
	resultRelInfo->ri_RelationDesc = resultRelationDesc;
	resultRelInfo->ri_NumIndices = 0;
	resultRelInfo->ri_IndexRelationDescs = NULL;
	resultRelInfo->ri_IndexRelationInfo = NULL;
874 875
	/* make a copy so as not to depend on relcache info not changing... */
	resultRelInfo->ri_TrigDesc = CopyTriggerDesc(resultRelationDesc->trigdesc);
876 877
	if (resultRelInfo->ri_TrigDesc)
	{
B
Bruce Momjian 已提交
878
		int			n = resultRelInfo->ri_TrigDesc->numtriggers;
879 880 881 882 883 884 885 886 887 888 889 890 891

		resultRelInfo->ri_TrigFunctions = (FmgrInfo *)
			palloc0(n * sizeof(FmgrInfo));
		if (doInstrument)
			resultRelInfo->ri_TrigInstrument = InstrAlloc(n);
		else
			resultRelInfo->ri_TrigInstrument = NULL;
	}
	else
	{
		resultRelInfo->ri_TrigFunctions = NULL;
		resultRelInfo->ri_TrigInstrument = NULL;
	}
892 893
	resultRelInfo->ri_ConstraintExprs = NULL;
	resultRelInfo->ri_junkFilter = NULL;
894
	resultRelInfo->ri_projectReturning = NULL;
895 896 897

	/*
	 * If there are indices on the result relation, open them and save
B
Bruce Momjian 已提交
898 899 900
	 * descriptors in the result relation info, so that we can add new index
	 * entries for the tuples we add/update.  We need not do this for a
	 * DELETE, however, since deletion doesn't affect indexes.
901 902 903 904 905 906
	 */
	if (resultRelationDesc->rd_rel->relhasindex &&
		operation != CMD_DELETE)
		ExecOpenIndices(resultRelInfo);
}

907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963
/*
 *		ExecContextForcesOids
 *
 * This is pretty grotty: when doing INSERT, UPDATE, or SELECT INTO,
 * we need to ensure that result tuples have space for an OID iff they are
 * going to be stored into a relation that has OIDs.  In other contexts
 * we are free to choose whether to leave space for OIDs in result tuples
 * (we generally don't want to, but we do if a physical-tlist optimization
 * is possible).  This routine checks the plan context and returns TRUE if the
 * choice is forced, FALSE if the choice is not forced.  In the TRUE case,
 * *hasoids is set to the required value.
 *
 * One reason this is ugly is that all plan nodes in the plan tree will emit
 * tuples with space for an OID, though we really only need the topmost node
 * to do so.  However, node types like Sort don't project new tuples but just
 * return their inputs, and in those cases the requirement propagates down
 * to the input node.  Eventually we might make this code smart enough to
 * recognize how far down the requirement really goes, but for now we just
 * make all plan nodes do the same thing if the top level forces the choice.
 *
 * We assume that estate->es_result_relation_info is already set up to
 * describe the target relation.  Note that in an UPDATE that spans an
 * inheritance tree, some of the target relations may have OIDs and some not.
 * We have to make the decisions on a per-relation basis as we initialize
 * each of the child plans of the topmost Append plan.
 *
 * SELECT INTO is even uglier, because we don't have the INTO relation's
 * descriptor available when this code runs; we have to look aside at a
 * flag set by InitPlan().
 */
bool
ExecContextForcesOids(PlanState *planstate, bool *hasoids)
{
	if (planstate->state->es_select_into)
	{
		*hasoids = planstate->state->es_into_oids;
		return true;
	}
	else
	{
		ResultRelInfo *ri = planstate->state->es_result_relation_info;

		if (ri != NULL)
		{
			Relation	rel = ri->ri_RelationDesc;

			if (rel != NULL)
			{
				*hasoids = rel->rd_rel->relhasoids;
				return true;
			}
		}
	}

	return false;
}

964
/* ----------------------------------------------------------------
965
 *		ExecEndPlan
966
 *
967
 *		Cleans up the query plan -- closes files and frees up storage
968 969 970 971 972 973
 *
 * NOTE: we are no longer very worried about freeing storage per se
 * in this code; FreeExecutorState should be guaranteed to release all
 * memory that needs to be released.  What we are worried about doing
 * is closing relations and dropping buffer pins.  Thus, for example,
 * tuple tables must be cleared or dropped to ensure pins are released.
974 975
 * ----------------------------------------------------------------
 */
976
void
977
ExecEndPlan(PlanState *planstate, EState *estate)
978
{
979 980
	ResultRelInfo *resultRelInfo;
	int			i;
981
	ListCell   *l;
982

983 984 985 986 987 988
	/*
	 * shut down any PlanQual processing we were doing
	 */
	if (estate->es_evalPlanQual != NULL)
		EndEvalPlanQual(estate);

B
Bruce Momjian 已提交
989
	/*
990
	 * shut down the node-type-specific query processing
991
	 */
992
	ExecEndNode(planstate);
993

B
Bruce Momjian 已提交
994
	/*
B
Bruce Momjian 已提交
995
	 * destroy the executor "tuple" table.
996
	 */
997 998
	ExecDropTupleTable(estate->es_tupleTable, true);
	estate->es_tupleTable = NULL;
999

B
Bruce Momjian 已提交
1000
	/*
B
Bruce Momjian 已提交
1001
	 * close the result relation(s) if any, but hold locks until xact commit.
1002
	 */
1003 1004
	resultRelInfo = estate->es_result_relations;
	for (i = estate->es_num_result_relations; i > 0; i--)
1005
	{
1006 1007 1008 1009
		/* Close indices and then the relation itself */
		ExecCloseIndices(resultRelInfo);
		heap_close(resultRelInfo->ri_RelationDesc, NoLock);
		resultRelInfo++;
1010 1011
	}

1012
	/*
1013
	 * close any relations selected FOR UPDATE/FOR SHARE, again keeping locks
1014
	 */
1015
	foreach(l, estate->es_rowMarks)
1016
	{
1017
		ExecRowMark *erm = lfirst(l);
1018 1019 1020

		heap_close(erm->relation, NoLock);
	}
1021 1022 1023
}

/* ----------------------------------------------------------------
1024 1025
 *		ExecutePlan
 *
1026
 *		processes the query plan to retrieve 'numberTuples' tuples in the
1027
 *		direction specified.
1028
 *
1029
 *		Retrieves all tuples if numberTuples is 0
1030
 *
1031
 *		result is either a slot containing the last tuple in the case
1032
 *		of a SELECT or NULL otherwise.
1033
 *
1034 1035
 * Note: the ctid attribute is a 'junk' attribute that is removed before the
 * user can see it
1036 1037 1038
 * ----------------------------------------------------------------
 */
static TupleTableSlot *
1039
ExecutePlan(EState *estate,
1040
			PlanState *planstate,
1041
			CmdType operation,
1042
			long numberTuples,
1043
			ScanDirection direction,
1044
			DestReceiver *dest)
1045
{
B
Bruce Momjian 已提交
1046
	JunkFilter *junkfilter;
1047
	TupleTableSlot *planSlot;
B
Bruce Momjian 已提交
1048 1049 1050 1051 1052
	TupleTableSlot *slot;
	ItemPointer tupleid = NULL;
	ItemPointerData tuple_ctid;
	long		current_tuple_count;
	TupleTableSlot *result;
1053

B
Bruce Momjian 已提交
1054
	/*
B
Bruce Momjian 已提交
1055
	 * initialize local variables
1056
	 */
1057 1058 1059
	current_tuple_count = 0;
	result = NULL;

B
Bruce Momjian 已提交
1060 1061
	/*
	 * Set the direction.
1062
	 */
1063 1064
	estate->es_direction = direction;

1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080
	/*
	 * Process BEFORE EACH STATEMENT triggers
	 */
	switch (operation)
	{
		case CMD_UPDATE:
			ExecBSUpdateTriggers(estate, estate->es_result_relation_info);
			break;
		case CMD_DELETE:
			ExecBSDeleteTriggers(estate, estate->es_result_relation_info);
			break;
		case CMD_INSERT:
			ExecBSInsertTriggers(estate, estate->es_result_relation_info);
			break;
		default:
			/* do nothing */
1081
			break;
1082 1083
	}

B
Bruce Momjian 已提交
1084
	/*
B
Bruce Momjian 已提交
1085
	 * Loop until we've processed the proper number of tuples from the plan.
1086 1087 1088 1089
	 */

	for (;;)
	{
1090 1091
		/* Reset the per-output-tuple exprcontext */
		ResetPerTupleExprContext(estate);
B
Bruce Momjian 已提交
1092

B
Bruce Momjian 已提交
1093
		/*
B
Bruce Momjian 已提交
1094
		 * Execute the plan and obtain a tuple
1095
		 */
B
Bruce Momjian 已提交
1096
lnext:	;
1097 1098
		if (estate->es_useEvalPlan)
		{
1099 1100 1101
			planSlot = EvalPlanQualNext(estate);
			if (TupIsNull(planSlot))
				planSlot = ExecProcNode(planstate);
1102 1103
		}
		else
1104
			planSlot = ExecProcNode(planstate);
1105

B
Bruce Momjian 已提交
1106
		/*
B
Bruce Momjian 已提交
1107 1108
		 * if the tuple is null, then we assume there is nothing more to
		 * process so we just return null...
1109
		 */
1110
		if (TupIsNull(planSlot))
1111 1112 1113
		{
			result = NULL;
			break;
1114
		}
1115
		slot = planSlot;
1116

B
Bruce Momjian 已提交
1117
		/*
B
Bruce Momjian 已提交
1118 1119
		 * if we have a junk filter, then project a new tuple with the junk
		 * removed.
1120
		 *
1121
		 * Store this new "clean" tuple in the junkfilter's resultSlot.
B
Bruce Momjian 已提交
1122 1123
		 * (Formerly, we stored it back over the "dirty" tuple, which is WRONG
		 * because that tuple slot has the wrong descriptor.)
1124
		 *
B
Bruce Momjian 已提交
1125
		 * Also, extract all the junk information we need.
1126
		 */
1127
		if ((junkfilter = estate->es_junkFilter) != NULL)
1128
		{
1129 1130
			Datum		datum;
			bool		isNull;
1131

B
Bruce Momjian 已提交
1132
			/*
1133 1134 1135 1136
			 * extract the 'ctid' junk attribute.
			 */
			if (operation == CMD_UPDATE || operation == CMD_DELETE)
			{
1137 1138
				datum = ExecGetJunkAttribute(slot, junkfilter->jf_junkAttNo,
											 &isNull);
1139
				/* shouldn't ever get a null result... */
1140
				if (isNull)
1141
					elog(ERROR, "ctid is NULL");
1142 1143

				tupleid = (ItemPointer) DatumGetPointer(datum);
B
Bruce Momjian 已提交
1144
				tuple_ctid = *tupleid;	/* make sure we don't free the ctid!! */
1145 1146
				tupleid = &tuple_ctid;
			}
B
Bruce Momjian 已提交
1147

1148 1149 1150
			/*
			 * Process any FOR UPDATE or FOR SHARE locking requested.
			 */
1151
			else if (estate->es_rowMarks != NIL)
1152
			{
1153
				ListCell   *l;
1154

B
Bruce Momjian 已提交
1155
		lmark:	;
1156
				foreach(l, estate->es_rowMarks)
1157
				{
1158
					ExecRowMark *erm = lfirst(l);
1159
					HeapTupleData tuple;
1160 1161 1162
					Buffer		buffer;
					ItemPointerData update_ctid;
					TransactionId update_xmax;
1163
					TupleTableSlot *newSlot;
B
Bruce Momjian 已提交
1164 1165
					LockTupleMode lockmode;
					HTSU_Result test;
1166

1167 1168 1169
					datum = ExecGetJunkAttribute(slot,
												 erm->ctidAttNo,
												 &isNull);
1170
					/* shouldn't ever get a null result... */
1171
					if (isNull)
1172
						elog(ERROR, "ctid is NULL");
1173

1174 1175
					tuple.t_self = *((ItemPointer) DatumGetPointer(datum));

1176
					if (erm->forUpdate)
1177 1178 1179 1180 1181
						lockmode = LockTupleExclusive;
					else
						lockmode = LockTupleShared;

					test = heap_lock_tuple(erm->relation, &tuple, &buffer,
1182 1183
										   &update_ctid, &update_xmax,
										   estate->es_snapshot->curcid,
1184
										   lockmode, erm->noWait);
1185 1186 1187 1188
					ReleaseBuffer(buffer);
					switch (test)
					{
						case HeapTupleSelfUpdated:
1189 1190 1191
							/* treat it as deleted; do not process */
							goto lnext;

1192 1193 1194 1195
						case HeapTupleMayBeUpdated:
							break;

						case HeapTupleUpdated:
1196
							if (IsXactIsoLevelSerializable)
1197
								ereport(ERROR,
B
Bruce Momjian 已提交
1198 1199
								 (errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
								  errmsg("could not serialize access due to concurrent update")));
1200 1201
							if (!ItemPointerEquals(&update_ctid,
												   &tuple.t_self))
1202
							{
1203 1204 1205 1206
								/* updated, so look at updated version */
								newSlot = EvalPlanQual(estate,
													   erm->rti,
													   &update_ctid,
1207
													   update_xmax,
B
Bruce Momjian 已提交
1208
												estate->es_snapshot->curcid);
1209
								if (!TupIsNull(newSlot))
1210
								{
1211
									slot = planSlot = newSlot;
1212 1213 1214 1215
									estate->es_useEvalPlan = true;
									goto lmark;
								}
							}
B
Bruce Momjian 已提交
1216 1217 1218

							/*
							 * if tuple was deleted or PlanQual failed for
B
Bruce Momjian 已提交
1219
							 * updated tuple - we must not return this tuple!
1220 1221
							 */
							goto lnext;
1222 1223

						default:
1224
							elog(ERROR, "unrecognized heap_lock_tuple status: %u",
1225
								 test);
1226
							return NULL;
1227 1228 1229
					}
				}
			}
1230

B
Bruce Momjian 已提交
1231
			/*
B
Bruce Momjian 已提交
1232 1233 1234
			 * Create a new "clean" tuple with all junk attributes removed. We
			 * don't need to do this for DELETE, however (there will in fact
			 * be no non-junk attributes in a DELETE!)
1235
			 */
1236 1237
			if (operation != CMD_DELETE)
				slot = ExecFilterJunk(junkfilter, slot);
1238
		}
1239

B
Bruce Momjian 已提交
1240
		/*
B
Bruce Momjian 已提交
1241 1242 1243
		 * now that we have a tuple, do the appropriate thing with it.. either
		 * return it to the user, add it to a relation someplace, delete it
		 * from a relation, or modify some of its attributes.
1244 1245 1246
		 */
		switch (operation)
		{
1247
			case CMD_SELECT:
1248
				ExecSelect(slot, dest, estate);
1249 1250
				result = slot;
				break;
1251

1252
			case CMD_INSERT:
1253
				ExecInsert(slot, tupleid, planSlot, dest, estate);
1254 1255
				result = NULL;
				break;
1256

1257
			case CMD_DELETE:
1258
				ExecDelete(tupleid, planSlot, dest, estate);
1259 1260
				result = NULL;
				break;
1261

1262
			case CMD_UPDATE:
1263
				ExecUpdate(slot, tupleid, planSlot, dest, estate);
1264 1265
				result = NULL;
				break;
1266

1267
			default:
1268 1269
				elog(ERROR, "unrecognized operation code: %d",
					 (int) operation);
1270
				result = NULL;
1271
				break;
1272
		}
B
Bruce Momjian 已提交
1273

B
Bruce Momjian 已提交
1274
		/*
B
Bruce Momjian 已提交
1275 1276 1277
		 * check our tuple count.. if we've processed the proper number then
		 * quit, else loop again and process more tuples.  Zero numberTuples
		 * means no limit.
1278
		 */
1279
		current_tuple_count++;
1280
		if (numberTuples && numberTuples == current_tuple_count)
1281
			break;
1282
	}
1283

1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299
	/*
	 * Process AFTER EACH STATEMENT triggers
	 */
	switch (operation)
	{
		case CMD_UPDATE:
			ExecASUpdateTriggers(estate, estate->es_result_relation_info);
			break;
		case CMD_DELETE:
			ExecASDeleteTriggers(estate, estate->es_result_relation_info);
			break;
		case CMD_INSERT:
			ExecASInsertTriggers(estate, estate->es_result_relation_info);
			break;
		default:
			/* do nothing */
1300
			break;
1301 1302
	}

B
Bruce Momjian 已提交
1303
	/*
B
Bruce Momjian 已提交
1304
	 * here, result is either a slot containing a tuple in the case of a
1305
	 * SELECT or NULL otherwise.
1306
	 */
1307
	return result;
1308 1309 1310
}

/* ----------------------------------------------------------------
1311
 *		ExecSelect
1312
 *
1313
 *		SELECTs are easy.. we just pass the tuple to the appropriate
1314
 *		output function.
1315 1316 1317
 * ----------------------------------------------------------------
 */
static void
1318
ExecSelect(TupleTableSlot *slot,
1319
		   DestReceiver *dest,
1320
		   EState *estate)
1321
{
1322
	(*dest->receiveSlot) (slot, dest);
1323 1324
	IncrRetrieved();
	(estate->es_processed)++;
1325 1326 1327
}

/* ----------------------------------------------------------------
1328
 *		ExecInsert
1329
 *
1330
 *		INSERTs are trickier.. we have to insert the tuple into
1331 1332
 *		the base relation and insert appropriate tuples into the
 *		index relations.
1333 1334 1335
 * ----------------------------------------------------------------
 */
static void
1336
ExecInsert(TupleTableSlot *slot,
1337
		   ItemPointer tupleid,
1338 1339
		   TupleTableSlot *planSlot,
		   DestReceiver *dest,
1340
		   EState *estate)
1341
{
1342
	HeapTuple	tuple;
1343
	ResultRelInfo *resultRelInfo;
1344 1345
	Relation	resultRelationDesc;
	Oid			newId;
1346

B
Bruce Momjian 已提交
1347
	/*
B
Bruce Momjian 已提交
1348 1349
	 * get the heap tuple out of the tuple table slot, making sure we have a
	 * writable copy
1350
	 */
1351
	tuple = ExecMaterializeSlot(slot);
1352

B
Bruce Momjian 已提交
1353
	/*
1354
	 * get information on the (current) result relation
1355
	 */
1356 1357
	resultRelInfo = estate->es_result_relation_info;
	resultRelationDesc = resultRelInfo->ri_RelationDesc;
1358 1359

	/* BEFORE ROW INSERT Triggers */
1360
	if (resultRelInfo->ri_TrigDesc &&
B
Bruce Momjian 已提交
1361
		resultRelInfo->ri_TrigDesc->n_before_row[TRIGGER_EVENT_INSERT] > 0)
1362
	{
1363
		HeapTuple	newtuple;
1364

1365
		newtuple = ExecBRInsertTriggers(estate, resultRelInfo, tuple);
1366 1367 1368 1369 1370 1371

		if (newtuple == NULL)	/* "do nothing" */
			return;

		if (newtuple != tuple)	/* modified by Trigger(s) */
		{
1372
			/*
1373 1374
			 * Put the modified tuple into a slot for convenience of routines
			 * below.  We assume the tuple was allocated in per-tuple memory
B
Bruce Momjian 已提交
1375 1376
			 * context, and therefore will go away by itself. The tuple table
			 * slot should not try to clear it.
1377
			 */
1378 1379 1380
			TupleTableSlot *newslot = estate->es_trig_tuple_slot;

			if (newslot->tts_tupleDescriptor != slot->tts_tupleDescriptor)
1381
				ExecSetSlotDescriptor(newslot, slot->tts_tupleDescriptor);
1382 1383
			ExecStoreTuple(newtuple, newslot, InvalidBuffer, false);
			slot = newslot;
1384
			tuple = newtuple;
1385 1386 1387
		}
	}

B
Bruce Momjian 已提交
1388
	/*
1389
	 * Check the constraints of the tuple
1390 1391
	 */
	if (resultRelationDesc->rd_att->constr)
1392
		ExecConstraints(resultRelInfo, slot, estate);
1393

B
Bruce Momjian 已提交
1394
	/*
B
Bruce Momjian 已提交
1395
	 * insert the tuple
1396
	 *
B
Bruce Momjian 已提交
1397 1398
	 * Note: heap_insert returns the tid (location) of the new tuple in the
	 * t_self field.
1399
	 */
1400
	newId = heap_insert(resultRelationDesc, tuple,
1401 1402
						estate->es_snapshot->curcid,
						true, true);
1403

1404
	IncrAppended();
1405 1406
	(estate->es_processed)++;
	estate->es_lastoid = newId;
T
Tom Lane 已提交
1407
	setLastTid(&(tuple->t_self));
1408

B
Bruce Momjian 已提交
1409
	/*
1410
	 * insert index entries for tuple
1411
	 */
1412
	if (resultRelInfo->ri_NumIndices > 0)
1413
		ExecInsertIndexTuples(slot, &(tuple->t_self), estate, false);
1414 1415

	/* AFTER ROW INSERT Triggers */
1416
	ExecARInsertTriggers(estate, resultRelInfo, tuple);
1417 1418 1419 1420 1421

	/* Process RETURNING if present */
	if (resultRelInfo->ri_projectReturning)
		ExecProcessReturning(resultRelInfo->ri_projectReturning,
							 slot, planSlot, dest);
1422 1423 1424
}

/* ----------------------------------------------------------------
1425
 *		ExecDelete
1426
 *
1427 1428
 *		DELETE is like UPDATE, except that we delete the tuple and no
 *		index modifications are needed
1429 1430 1431
 * ----------------------------------------------------------------
 */
static void
1432 1433 1434
ExecDelete(ItemPointer tupleid,
		   TupleTableSlot *planSlot,
		   DestReceiver *dest,
1435
		   EState *estate)
1436
{
1437
	ResultRelInfo *resultRelInfo;
B
Bruce Momjian 已提交
1438
	Relation	resultRelationDesc;
B
Bruce Momjian 已提交
1439
	HTSU_Result result;
1440 1441
	ItemPointerData update_ctid;
	TransactionId update_xmax;
1442

B
Bruce Momjian 已提交
1443
	/*
1444
	 * get information on the (current) result relation
1445
	 */
1446 1447
	resultRelInfo = estate->es_result_relation_info;
	resultRelationDesc = resultRelInfo->ri_RelationDesc;
1448 1449

	/* BEFORE ROW DELETE Triggers */
1450
	if (resultRelInfo->ri_TrigDesc &&
B
Bruce Momjian 已提交
1451
		resultRelInfo->ri_TrigDesc->n_before_row[TRIGGER_EVENT_DELETE] > 0)
1452
	{
1453
		bool		dodelete;
1454

1455
		dodelete = ExecBRDeleteTriggers(estate, resultRelInfo, tupleid,
1456
										estate->es_snapshot->curcid);
1457 1458 1459 1460 1461

		if (!dodelete)			/* "do nothing" */
			return;
	}

V
Vadim B. Mikheev 已提交
1462
	/*
B
Bruce Momjian 已提交
1463
	 * delete the tuple
1464
	 *
1465 1466
	 * Note: if es_crosscheck_snapshot isn't InvalidSnapshot, we check that
	 * the row to be deleted is visible to that snapshot, and throw a can't-
B
Bruce Momjian 已提交
1467
	 * serialize error if not.	This is a special-case behavior needed for
1468
	 * referential integrity updates in serializable transactions.
1469
	 */
1470
ldelete:;
1471
	result = heap_delete(resultRelationDesc, tupleid,
1472
						 &update_ctid, &update_xmax,
1473 1474
						 estate->es_snapshot->curcid,
						 estate->es_crosscheck_snapshot,
B
Bruce Momjian 已提交
1475
						 true /* wait for commit */ );
V
Vadim B. Mikheev 已提交
1476 1477 1478
	switch (result)
	{
		case HeapTupleSelfUpdated:
1479
			/* already deleted by self; nothing to do */
V
Vadim B. Mikheev 已提交
1480 1481 1482 1483 1484 1485
			return;

		case HeapTupleMayBeUpdated:
			break;

		case HeapTupleUpdated:
1486
			if (IsXactIsoLevelSerializable)
1487 1488
				ereport(ERROR,
						(errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
1489
						 errmsg("could not serialize access due to concurrent update")));
1490
			else if (!ItemPointerEquals(tupleid, &update_ctid))
1491
			{
1492
				TupleTableSlot *epqslot;
1493

1494 1495 1496
				epqslot = EvalPlanQual(estate,
									   resultRelInfo->ri_RangeTableIndex,
									   &update_ctid,
1497 1498
									   update_xmax,
									   estate->es_snapshot->curcid);
V
Vadim B. Mikheev 已提交
1499
				if (!TupIsNull(epqslot))
1500
				{
1501
					*tupleid = update_ctid;
1502 1503 1504
					goto ldelete;
				}
			}
1505
			/* tuple already deleted; nothing to do */
V
Vadim B. Mikheev 已提交
1506 1507 1508
			return;

		default:
1509
			elog(ERROR, "unrecognized heap_delete status: %u", result);
V
Vadim B. Mikheev 已提交
1510 1511
			return;
	}
1512 1513 1514 1515

	IncrDeleted();
	(estate->es_processed)++;

B
Bruce Momjian 已提交
1516
	/*
B
Bruce Momjian 已提交
1517
	 * Note: Normally one would think that we have to delete index tuples
1518
	 * associated with the heap tuple now...
1519
	 *
1520 1521 1522
	 * ... but in POSTGRES, we have no need to do this because VACUUM will
	 * take care of it later.  We can't delete index tuples immediately
	 * anyway, since the tuple is still visible to other transactions.
1523 1524 1525
	 */

	/* AFTER ROW DELETE Triggers */
1526
	ExecARDeleteTriggers(estate, resultRelInfo, tupleid);
1527 1528 1529 1530 1531

	/* Process RETURNING if present */
	if (resultRelInfo->ri_projectReturning)
	{
		/*
B
Bruce Momjian 已提交
1532 1533
		 * We have to put the target tuple into a slot, which means first we
		 * gotta fetch it.	We can use the trigger tuple slot.
1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553
		 */
		TupleTableSlot *slot = estate->es_trig_tuple_slot;
		HeapTupleData deltuple;
		Buffer		delbuffer;

		deltuple.t_self = *tupleid;
		if (!heap_fetch(resultRelationDesc, SnapshotAny,
						&deltuple, &delbuffer, false, NULL))
			elog(ERROR, "failed to fetch deleted tuple for DELETE RETURNING");

		if (slot->tts_tupleDescriptor != RelationGetDescr(resultRelationDesc))
			ExecSetSlotDescriptor(slot, RelationGetDescr(resultRelationDesc));
		ExecStoreTuple(&deltuple, slot, InvalidBuffer, false);

		ExecProcessReturning(resultRelInfo->ri_projectReturning,
							 slot, planSlot, dest);

		ExecClearTuple(slot);
		ReleaseBuffer(delbuffer);
	}
1554 1555 1556
}

/* ----------------------------------------------------------------
1557
 *		ExecUpdate
1558
 *
1559 1560 1561 1562
 *		note: we can't run UPDATE queries with transactions
 *		off because UPDATEs are actually INSERTs and our
 *		scan will mistakenly loop forever, updating the tuple
 *		it just inserted..	This should be fixed but until it
1563 1564
 *		is, we don't want to get stuck in an infinite loop
 *		which corrupts your database..
1565 1566 1567
 * ----------------------------------------------------------------
 */
static void
1568
ExecUpdate(TupleTableSlot *slot,
B
Bruce Momjian 已提交
1569
		   ItemPointer tupleid,
1570 1571
		   TupleTableSlot *planSlot,
		   DestReceiver *dest,
B
Bruce Momjian 已提交
1572
		   EState *estate)
1573
{
B
Bruce Momjian 已提交
1574
	HeapTuple	tuple;
1575
	ResultRelInfo *resultRelInfo;
B
Bruce Momjian 已提交
1576
	Relation	resultRelationDesc;
B
Bruce Momjian 已提交
1577
	HTSU_Result result;
1578 1579
	ItemPointerData update_ctid;
	TransactionId update_xmax;
1580

B
Bruce Momjian 已提交
1581
	/*
B
Bruce Momjian 已提交
1582
	 * abort the operation if not running transactions
1583 1584
	 */
	if (IsBootstrapProcessingMode())
1585
		elog(ERROR, "cannot UPDATE during bootstrap");
1586

B
Bruce Momjian 已提交
1587
	/*
B
Bruce Momjian 已提交
1588 1589
	 * get the heap tuple out of the tuple table slot, making sure we have a
	 * writable copy
1590
	 */
1591
	tuple = ExecMaterializeSlot(slot);
1592

B
Bruce Momjian 已提交
1593
	/*
1594
	 * get information on the (current) result relation
1595
	 */
1596 1597
	resultRelInfo = estate->es_result_relation_info;
	resultRelationDesc = resultRelInfo->ri_RelationDesc;
1598 1599

	/* BEFORE ROW UPDATE Triggers */
1600
	if (resultRelInfo->ri_TrigDesc &&
B
Bruce Momjian 已提交
1601
		resultRelInfo->ri_TrigDesc->n_before_row[TRIGGER_EVENT_UPDATE] > 0)
1602
	{
1603
		HeapTuple	newtuple;
1604

1605
		newtuple = ExecBRUpdateTriggers(estate, resultRelInfo,
1606
										tupleid, tuple,
1607
										estate->es_snapshot->curcid);
1608 1609 1610 1611 1612 1613

		if (newtuple == NULL)	/* "do nothing" */
			return;

		if (newtuple != tuple)	/* modified by Trigger(s) */
		{
1614
			/*
1615 1616
			 * Put the modified tuple into a slot for convenience of routines
			 * below.  We assume the tuple was allocated in per-tuple memory
B
Bruce Momjian 已提交
1617 1618
			 * context, and therefore will go away by itself. The tuple table
			 * slot should not try to clear it.
1619
			 */
1620 1621 1622
			TupleTableSlot *newslot = estate->es_trig_tuple_slot;

			if (newslot->tts_tupleDescriptor != slot->tts_tupleDescriptor)
1623
				ExecSetSlotDescriptor(newslot, slot->tts_tupleDescriptor);
1624 1625
			ExecStoreTuple(newtuple, newslot, InvalidBuffer, false);
			slot = newslot;
1626
			tuple = newtuple;
1627 1628 1629
		}
	}

B
Bruce Momjian 已提交
1630
	/*
1631
	 * Check the constraints of the tuple
1632
	 *
1633 1634
	 * If we generate a new candidate tuple after EvalPlanQual testing, we
	 * must loop back here and recheck constraints.  (We don't need to redo
B
Bruce Momjian 已提交
1635 1636 1637
	 * triggers, however.  If there are any BEFORE triggers then trigger.c
	 * will have done heap_lock_tuple to lock the correct tuple, so there's no
	 * need to do them again.)
1638
	 */
1639
lreplace:;
1640
	if (resultRelationDesc->rd_att->constr)
1641
		ExecConstraints(resultRelInfo, slot, estate);
1642

V
Vadim B. Mikheev 已提交
1643
	/*
B
Bruce Momjian 已提交
1644
	 * replace the heap tuple
1645
	 *
1646 1647
	 * Note: if es_crosscheck_snapshot isn't InvalidSnapshot, we check that
	 * the row to be updated is visible to that snapshot, and throw a can't-
B
Bruce Momjian 已提交
1648
	 * serialize error if not.	This is a special-case behavior needed for
1649
	 * referential integrity updates in serializable transactions.
1650
	 */
1651
	result = heap_update(resultRelationDesc, tupleid, tuple,
1652
						 &update_ctid, &update_xmax,
1653 1654
						 estate->es_snapshot->curcid,
						 estate->es_crosscheck_snapshot,
B
Bruce Momjian 已提交
1655
						 true /* wait for commit */ );
V
Vadim B. Mikheev 已提交
1656 1657 1658
	switch (result)
	{
		case HeapTupleSelfUpdated:
1659
			/* already deleted by self; nothing to do */
V
Vadim B. Mikheev 已提交
1660 1661 1662 1663 1664 1665
			return;

		case HeapTupleMayBeUpdated:
			break;

		case HeapTupleUpdated:
1666
			if (IsXactIsoLevelSerializable)
1667 1668
				ereport(ERROR,
						(errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
1669
						 errmsg("could not serialize access due to concurrent update")));
1670
			else if (!ItemPointerEquals(tupleid, &update_ctid))
1671
			{
1672
				TupleTableSlot *epqslot;
1673

1674 1675 1676
				epqslot = EvalPlanQual(estate,
									   resultRelInfo->ri_RangeTableIndex,
									   &update_ctid,
1677 1678
									   update_xmax,
									   estate->es_snapshot->curcid);
V
Vadim B. Mikheev 已提交
1679
				if (!TupIsNull(epqslot))
1680
				{
1681
					*tupleid = update_ctid;
1682 1683
					slot = ExecFilterJunk(estate->es_junkFilter, epqslot);
					tuple = ExecMaterializeSlot(slot);
1684 1685 1686
					goto lreplace;
				}
			}
1687
			/* tuple already deleted; nothing to do */
V
Vadim B. Mikheev 已提交
1688 1689 1690
			return;

		default:
1691
			elog(ERROR, "unrecognized heap_update status: %u", result);
V
Vadim B. Mikheev 已提交
1692
			return;
1693 1694 1695 1696 1697
	}

	IncrReplaced();
	(estate->es_processed)++;

B
Bruce Momjian 已提交
1698
	/*
B
Bruce Momjian 已提交
1699 1700 1701
	 * Note: instead of having to update the old index tuples associated with
	 * the heap tuple, all we do is form and insert new index tuples. This is
	 * because UPDATEs are actually DELETEs and INSERTs, and index tuple
1702
	 * deletion is done later by VACUUM (see notes in ExecDelete).	All we do
1703
	 * here is insert new index tuples.  -cim 9/27/89
1704 1705
	 */

B
Bruce Momjian 已提交
1706
	/*
1707
	 * insert index entries for tuple
1708
	 *
B
Bruce Momjian 已提交
1709 1710
	 * Note: heap_update returns the tid (location) of the new tuple in the
	 * t_self field.
1711
	 */
1712
	if (resultRelInfo->ri_NumIndices > 0)
1713
		ExecInsertIndexTuples(slot, &(tuple->t_self), estate, false);
1714 1715

	/* AFTER ROW UPDATE Triggers */
1716
	ExecARUpdateTriggers(estate, resultRelInfo, tupleid, tuple);
1717 1718 1719 1720 1721

	/* Process RETURNING if present */
	if (resultRelInfo->ri_projectReturning)
		ExecProcessReturning(resultRelInfo->ri_projectReturning,
							 slot, planSlot, dest);
1722
}
V
Vadim B. Mikheev 已提交
1723

1724 1725 1726
/*
 * ExecRelCheck --- check that tuple meets constraints for result relation
 */
1727
static const char *
1728 1729
ExecRelCheck(ResultRelInfo *resultRelInfo,
			 TupleTableSlot *slot, EState *estate)
V
Vadim B. Mikheev 已提交
1730
{
1731
	Relation	rel = resultRelInfo->ri_RelationDesc;
1732 1733
	int			ncheck = rel->rd_att->constr->num_check;
	ConstrCheck *check = rel->rd_att->constr->check;
1734
	ExprContext *econtext;
1735
	MemoryContext oldContext;
1736 1737
	List	   *qual;
	int			i;
1738

1739 1740
	/*
	 * If first time through for this result relation, build expression
B
Bruce Momjian 已提交
1741 1742
	 * nodetrees for rel's constraint expressions.  Keep them in the per-query
	 * memory context so they'll survive throughout the query.
1743 1744 1745 1746 1747 1748 1749 1750
	 */
	if (resultRelInfo->ri_ConstraintExprs == NULL)
	{
		oldContext = MemoryContextSwitchTo(estate->es_query_cxt);
		resultRelInfo->ri_ConstraintExprs =
			(List **) palloc(ncheck * sizeof(List *));
		for (i = 0; i < ncheck; i++)
		{
1751 1752
			/* ExecQual wants implicit-AND form */
			qual = make_ands_implicit(stringToNode(check[i].ccbin));
1753
			resultRelInfo->ri_ConstraintExprs[i] = (List *)
1754
				ExecPrepareExpr((Expr *) qual, estate);
1755 1756 1757 1758
		}
		MemoryContextSwitchTo(oldContext);
	}

1759
	/*
B
Bruce Momjian 已提交
1760 1761
	 * We will use the EState's per-tuple context for evaluating constraint
	 * expressions (creating it if it's not already there).
1762
	 */
1763
	econtext = GetPerTupleExprContext(estate);
1764

1765 1766 1767 1768
	/* Arrange for econtext's scan tuple to be the tuple under test */
	econtext->ecxt_scantuple = slot;

	/* And evaluate the constraints */
1769 1770
	for (i = 0; i < ncheck; i++)
	{
1771
		qual = resultRelInfo->ri_ConstraintExprs[i];
1772

1773 1774
		/*
		 * NOTE: SQL92 specifies that a NULL result from a constraint
1775 1776
		 * expression is not to be treated as a failure.  Therefore, tell
		 * ExecQual to return TRUE for NULL.
1777
		 */
1778
		if (!ExecQual(qual, econtext, true))
1779
			return check[i].ccname;
1780 1781
	}

1782
	/* NULL result means no error */
1783
	return NULL;
V
Vadim B. Mikheev 已提交
1784 1785
}

1786
void
1787
ExecConstraints(ResultRelInfo *resultRelInfo,
1788
				TupleTableSlot *slot, EState *estate)
V
Vadim B. Mikheev 已提交
1789
{
1790
	Relation	rel = resultRelInfo->ri_RelationDesc;
1791 1792 1793
	TupleConstr *constr = rel->rd_att->constr;

	Assert(constr);
1794

1795
	if (constr->has_not_null)
V
Vadim B. Mikheev 已提交
1796
	{
1797
		int			natts = rel->rd_att->natts;
1798
		int			attrChk;
1799

1800
		for (attrChk = 1; attrChk <= natts; attrChk++)
1801
		{
B
Bruce Momjian 已提交
1802
			if (rel->rd_att->attrs[attrChk - 1]->attnotnull &&
1803
				slot_attisnull(slot, attrChk))
1804 1805
				ereport(ERROR,
						(errcode(ERRCODE_NOT_NULL_VIOLATION),
1806
						 errmsg("null value in column \"%s\" violates not-null constraint",
B
Bruce Momjian 已提交
1807
						NameStr(rel->rd_att->attrs[attrChk - 1]->attname))));
1808 1809 1810
		}
	}

1811
	if (constr->num_check > 0)
1812
	{
B
Bruce Momjian 已提交
1813
		const char *failed;
1814

1815
		if ((failed = ExecRelCheck(resultRelInfo, slot, estate)) != NULL)
1816 1817
			ereport(ERROR,
					(errcode(ERRCODE_CHECK_VIOLATION),
1818
					 errmsg("new row for relation \"%s\" violates check constraint \"%s\"",
1819
							RelationGetRelationName(rel), failed)));
1820
	}
V
Vadim B. Mikheev 已提交
1821
}
1822

1823 1824 1825 1826 1827 1828 1829 1830 1831
/*
 * ExecProcessReturning --- evaluate a RETURNING list and send to dest
 *
 * projectReturning: RETURNING projection info for current result rel
 * tupleSlot: slot holding tuple actually inserted/updated/deleted
 * planSlot: slot holding tuple returned by top plan node
 * dest: where to send the output
 */
static void
B
Bruce Momjian 已提交
1832
ExecProcessReturning(ProjectionInfo *projectReturning,
1833 1834 1835 1836
					 TupleTableSlot *tupleSlot,
					 TupleTableSlot *planSlot,
					 DestReceiver *dest)
{
B
Bruce Momjian 已提交
1837 1838
	ExprContext *econtext = projectReturning->pi_exprContext;
	TupleTableSlot *retSlot;
1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858

	/*
	 * Reset per-tuple memory context to free any expression evaluation
	 * storage allocated in the previous cycle.
	 */
	ResetExprContext(econtext);

	/* Make tuple and any needed join variables available to ExecProject */
	econtext->ecxt_scantuple = tupleSlot;
	econtext->ecxt_outertuple = planSlot;

	/* Compute the RETURNING expressions */
	retSlot = ExecProject(projectReturning, NULL);

	/* Send to dest */
	(*dest->receiveSlot) (retSlot, dest);

	ExecClearTuple(retSlot);
}

1859 1860 1861 1862 1863
/*
 * Check a modified tuple to see if we want to process its updated version
 * under READ COMMITTED rules.
 *
 * See backend/executor/README for some info about how this works.
1864 1865 1866 1867 1868
 *
 *	estate - executor state data
 *	rti - rangetable index of table containing tuple
 *	*tid - t_ctid from the outdated tuple (ie, next updated version)
 *	priorXmax - t_xmax from the outdated tuple
1869
 *	curCid - command ID of current command of my transaction
1870 1871 1872 1873 1874 1875
 *
 * *tid is also an output parameter: it's modified to hold the TID of the
 * latest version of the tuple (note this may be changed even on failure)
 *
 * Returns a slot containing the new candidate update/delete tuple, or
 * NULL if we determine we shouldn't process the row.
1876
 */
B
Bruce Momjian 已提交
1877
TupleTableSlot *
1878
EvalPlanQual(EState *estate, Index rti,
1879
			 ItemPointer tid, TransactionId priorXmax, CommandId curCid)
1880
{
1881 1882
	evalPlanQual *epq;
	EState	   *epqstate;
B
Bruce Momjian 已提交
1883 1884
	Relation	relation;
	HeapTupleData tuple;
1885 1886
	HeapTuple	copyTuple = NULL;
	bool		endNode;
1887 1888 1889

	Assert(rti != 0);

1890 1891 1892 1893 1894 1895 1896 1897
	/*
	 * find relation containing target tuple
	 */
	if (estate->es_result_relation_info != NULL &&
		estate->es_result_relation_info->ri_RangeTableIndex == rti)
		relation = estate->es_result_relation_info->ri_RelationDesc;
	else
	{
1898
		ListCell   *l;
1899 1900

		relation = NULL;
1901
		foreach(l, estate->es_rowMarks)
1902
		{
1903
			if (((ExecRowMark *) lfirst(l))->rti == rti)
1904
			{
1905
				relation = ((ExecRowMark *) lfirst(l))->relation;
1906 1907 1908 1909
				break;
			}
		}
		if (relation == NULL)
1910
			elog(ERROR, "could not find RowMark for RT index %u", rti);
1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922
	}

	/*
	 * fetch tid tuple
	 *
	 * Loop here to deal with updated or busy tuples
	 */
	tuple.t_self = *tid;
	for (;;)
	{
		Buffer		buffer;

1923
		if (heap_fetch(relation, SnapshotDirty, &tuple, &buffer, true, NULL))
1924
		{
1925 1926
			/*
			 * If xmin isn't what we're expecting, the slot must have been
B
Bruce Momjian 已提交
1927 1928 1929
			 * recycled and reused for an unrelated tuple.	This implies that
			 * the latest version of the row was deleted, so we need do
			 * nothing.  (Should be safe to examine xmin without getting
1930 1931 1932 1933 1934 1935 1936 1937 1938
			 * buffer's content lock, since xmin never changes in an existing
			 * tuple.)
			 */
			if (!TransactionIdEquals(HeapTupleHeaderGetXmin(tuple.t_data),
									 priorXmax))
			{
				ReleaseBuffer(buffer);
				return NULL;
			}
1939

1940
			/* otherwise xmin should not be dirty... */
1941
			if (TransactionIdIsValid(SnapshotDirty->xmin))
1942
				elog(ERROR, "t_xmin is uncommitted in tuple to be updated");
1943 1944

			/*
B
Bruce Momjian 已提交
1945 1946
			 * If tuple is being updated by other transaction then we have to
			 * wait for its commit/abort.
1947
			 */
1948
			if (TransactionIdIsValid(SnapshotDirty->xmax))
1949 1950
			{
				ReleaseBuffer(buffer);
1951 1952
				XactLockTableWait(SnapshotDirty->xmax);
				continue;		/* loop back to repeat heap_fetch */
1953 1954
			}

1955 1956 1957 1958
			/*
			 * If tuple was inserted by our own transaction, we have to check
			 * cmin against curCid: cmin >= curCid means our command cannot
			 * see the tuple, so we should ignore it.  Without this we are
B
Bruce Momjian 已提交
1959 1960 1961 1962 1963 1964
			 * open to the "Halloween problem" of indefinitely re-updating the
			 * same tuple.	(We need not check cmax because
			 * HeapTupleSatisfiesDirty will consider a tuple deleted by our
			 * transaction dead, regardless of cmax.)  We just checked that
			 * priorXmax == xmin, so we can test that variable instead of
			 * doing HeapTupleHeaderGetXmin again.
1965 1966 1967 1968 1969 1970 1971 1972
			 */
			if (TransactionIdIsCurrentTransactionId(priorXmax) &&
				HeapTupleHeaderGetCmin(tuple.t_data) >= curCid)
			{
				ReleaseBuffer(buffer);
				return NULL;
			}

1973 1974 1975 1976 1977 1978 1979 1980 1981
			/*
			 * We got tuple - now copy it for use by recheck query.
			 */
			copyTuple = heap_copytuple(&tuple);
			ReleaseBuffer(buffer);
			break;
		}

		/*
B
Bruce Momjian 已提交
1982 1983
		 * If the referenced slot was actually empty, the latest version of
		 * the row must have been deleted, so we need do nothing.
1984
		 */
1985
		if (tuple.t_data == NULL)
1986
		{
1987 1988
			ReleaseBuffer(buffer);
			return NULL;
1989 1990 1991
		}

		/*
1992
		 * As above, if xmin isn't what we're expecting, do nothing.
1993
		 */
1994 1995 1996 1997 1998 1999 2000 2001 2002
		if (!TransactionIdEquals(HeapTupleHeaderGetXmin(tuple.t_data),
								 priorXmax))
		{
			ReleaseBuffer(buffer);
			return NULL;
		}

		/*
		 * If we get here, the tuple was found but failed SnapshotDirty.
B
Bruce Momjian 已提交
2003 2004 2005 2006 2007 2008
		 * Assuming the xmin is either a committed xact or our own xact (as it
		 * certainly should be if we're trying to modify the tuple), this must
		 * mean that the row was updated or deleted by either a committed xact
		 * or our own xact.  If it was deleted, we can ignore it; if it was
		 * updated then chain up to the next version and repeat the whole
		 * test.
2009
		 *
B
Bruce Momjian 已提交
2010 2011
		 * As above, it should be safe to examine xmax and t_ctid without the
		 * buffer content lock, because they can't be changing.
2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025
		 */
		if (ItemPointerEquals(&tuple.t_self, &tuple.t_data->t_ctid))
		{
			/* deleted, so forget about it */
			ReleaseBuffer(buffer);
			return NULL;
		}

		/* updated, so look at the updated row */
		tuple.t_self = tuple.t_data->t_ctid;
		/* updated row should have xmin matching this xmax */
		priorXmax = HeapTupleHeaderGetXmax(tuple.t_data);
		ReleaseBuffer(buffer);
		/* loop back to fetch next in chain */
2026 2027 2028
	}

	/*
B
Bruce Momjian 已提交
2029 2030
	 * For UPDATE/DELETE we have to return tid of actual row we're executing
	 * PQ for.
2031 2032 2033 2034
	 */
	*tid = tuple.t_self;

	/*
2035
	 * Need to run a recheck subquery.	Find or create a PQ stack entry.
2036
	 */
2037
	epq = estate->es_evalPlanQual;
2038 2039
	endNode = true;

2040 2041
	if (epq != NULL && epq->rti == 0)
	{
2042
		/* Top PQ stack entry is idle, so re-use it */
2043
		Assert(!(estate->es_useEvalPlan) && epq->next == NULL);
2044 2045 2046 2047 2048
		epq->rti = rti;
		endNode = false;
	}

	/*
B
Bruce Momjian 已提交
2049 2050 2051 2052
	 * If this is request for another RTE - Ra, - then we have to check wasn't
	 * PlanQual requested for Ra already and if so then Ra' row was updated
	 * again and we have to re-start old execution for Ra and forget all what
	 * we done after Ra was suspended. Cool? -:))
2053
	 */
B
Bruce Momjian 已提交
2054
	if (epq != NULL && epq->rti != rti &&
2055
		epq->estate->es_evTuple[rti - 1] != NULL)
2056 2057 2058
	{
		do
		{
2059 2060
			evalPlanQual *oldepq;

2061
			/* stop execution */
2062 2063 2064 2065
			EvalPlanQualStop(epq);
			/* pop previous PlanQual from the stack */
			oldepq = epq->next;
			Assert(oldepq && oldepq->rti != 0);
2066 2067 2068
			/* push current PQ to freePQ stack */
			oldepq->free = epq;
			epq = oldepq;
2069
			estate->es_evalPlanQual = epq;
2070 2071 2072
		} while (epq->rti != rti);
	}

B
Bruce Momjian 已提交
2073
	/*
B
Bruce Momjian 已提交
2074 2075
	 * If we are requested for another RTE then we have to suspend execution
	 * of current PlanQual and start execution for new one.
2076 2077 2078 2079
	 */
	if (epq == NULL || epq->rti != rti)
	{
		/* try to reuse plan used previously */
B
Bruce Momjian 已提交
2080
		evalPlanQual *newepq = (epq != NULL) ? epq->free : NULL;
2081

2082
		if (newepq == NULL)		/* first call or freePQ stack is empty */
2083
		{
2084
			newepq = (evalPlanQual *) palloc0(sizeof(evalPlanQual));
2085
			newepq->free = NULL;
2086 2087
			newepq->estate = NULL;
			newepq->planstate = NULL;
2088 2089
		}
		else
2090
		{
2091 2092 2093
			/* recycle previously used PlanQual */
			Assert(newepq->estate == NULL);
			epq->free = NULL;
2094
		}
2095
		/* push current PQ to the stack */
2096
		newepq->next = epq;
2097
		epq = newepq;
2098
		estate->es_evalPlanQual = epq;
2099 2100 2101 2102
		epq->rti = rti;
		endNode = false;
	}

2103
	Assert(epq->rti == rti);
2104 2105

	/*
B
Bruce Momjian 已提交
2106 2107 2108 2109 2110 2111
	 * Ok - we're requested for the same RTE.  Unfortunately we still have to
	 * end and restart execution of the plan, because ExecReScan wouldn't
	 * ensure that upper plan nodes would reset themselves.  We could make
	 * that work if insertion of the target tuple were integrated with the
	 * Param mechanism somehow, so that the upper plan nodes know that their
	 * children's outputs have changed.
2112
	 *
B
Bruce Momjian 已提交
2113 2114
	 * Note that the stack of free evalPlanQual nodes is quite useless at the
	 * moment, since it only saves us from pallocing/releasing the
B
Bruce Momjian 已提交
2115 2116
	 * evalPlanQual nodes themselves.  But it will be useful once we implement
	 * ReScan instead of end/restart for re-using PlanQual nodes.
2117 2118
	 */
	if (endNode)
2119
	{
2120
		/* stop execution */
2121
		EvalPlanQualStop(epq);
2122
	}
2123

2124 2125 2126
	/*
	 * Initialize new recheck query.
	 *
B
Bruce Momjian 已提交
2127 2128
	 * Note: if we were re-using PlanQual plans via ExecReScan, we'd need to
	 * instead copy down changeable state from the top plan (including
B
Bruce Momjian 已提交
2129 2130
	 * es_result_relation_info, es_junkFilter) and reset locally changeable
	 * state in the epq (including es_param_exec_vals, es_evTupleNull).
2131 2132 2133
	 */
	EvalPlanQualStart(epq, estate, epq->next);

2134
	/*
B
Bruce Momjian 已提交
2135 2136
	 * free old RTE' tuple, if any, and store target tuple where relation's
	 * scan node will see it
2137
	 */
2138
	epqstate = epq->estate;
2139 2140 2141
	if (epqstate->es_evTuple[rti - 1] != NULL)
		heap_freetuple(epqstate->es_evTuple[rti - 1]);
	epqstate->es_evTuple[rti - 1] = copyTuple;
2142

2143
	return EvalPlanQualNext(estate);
2144 2145
}

B
Bruce Momjian 已提交
2146
static TupleTableSlot *
2147 2148
EvalPlanQualNext(EState *estate)
{
2149 2150
	evalPlanQual *epq = estate->es_evalPlanQual;
	MemoryContext oldcontext;
B
Bruce Momjian 已提交
2151
	TupleTableSlot *slot;
2152 2153 2154 2155

	Assert(epq->rti != 0);

lpqnext:;
2156
	oldcontext = MemoryContextSwitchTo(epq->estate->es_query_cxt);
2157
	slot = ExecProcNode(epq->planstate);
2158
	MemoryContextSwitchTo(oldcontext);
2159 2160 2161 2162 2163 2164

	/*
	 * No more tuples for this PQ. Continue previous one.
	 */
	if (TupIsNull(slot))
	{
2165 2166
		evalPlanQual *oldepq;

2167
		/* stop execution */
2168
		EvalPlanQualStop(epq);
2169
		/* pop old PQ from the stack */
2170 2171
		oldepq = epq->next;
		if (oldepq == NULL)
2172
		{
2173 2174 2175 2176
			/* this is the first (oldest) PQ - mark as free */
			epq->rti = 0;
			estate->es_useEvalPlan = false;
			/* and continue Query execution */
2177
			return NULL;
2178 2179 2180 2181 2182
		}
		Assert(oldepq->rti != 0);
		/* push current PQ to freePQ stack */
		oldepq->free = epq;
		epq = oldepq;
2183
		estate->es_evalPlanQual = epq;
2184 2185 2186
		goto lpqnext;
	}

2187
	return slot;
2188
}
2189 2190 2191 2192

static void
EndEvalPlanQual(EState *estate)
{
2193
	evalPlanQual *epq = estate->es_evalPlanQual;
2194

2195 2196
	if (epq->rti == 0)			/* plans already shutdowned */
	{
2197
		Assert(epq->next == NULL);
2198
		return;
2199
	}
2200 2201 2202

	for (;;)
	{
2203 2204
		evalPlanQual *oldepq;

2205
		/* stop execution */
2206
		EvalPlanQualStop(epq);
2207
		/* pop old PQ from the stack */
2208 2209
		oldepq = epq->next;
		if (oldepq == NULL)
2210
		{
2211 2212 2213
			/* this is the first (oldest) PQ - mark as free */
			epq->rti = 0;
			estate->es_useEvalPlan = false;
2214 2215 2216 2217 2218 2219
			break;
		}
		Assert(oldepq->rti != 0);
		/* push current PQ to freePQ stack */
		oldepq->free = epq;
		epq = oldepq;
2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236
		estate->es_evalPlanQual = epq;
	}
}

/*
 * Start execution of one level of PlanQual.
 *
 * This is a cut-down version of ExecutorStart(): we copy some state from
 * the top-level estate rather than initializing it fresh.
 */
static void
EvalPlanQualStart(evalPlanQual *epq, EState *estate, evalPlanQual *priorepq)
{
	EState	   *epqstate;
	int			rtsize;
	MemoryContext oldcontext;

2237
	rtsize = list_length(estate->es_range_table);
2238

2239 2240 2241 2242
	/*
	 * It's tempting to think about using CreateSubExecutorState here, but
	 * at present we can't because of memory leakage concerns ...
	 */
2243 2244 2245 2246 2247
	epq->estate = epqstate = CreateExecutorState();

	oldcontext = MemoryContextSwitchTo(epqstate->es_query_cxt);

	/*
B
Bruce Momjian 已提交
2248 2249 2250 2251
	 * The epqstates share the top query's copy of unchanging state such as
	 * the snapshot, rangetable, result-rel info, and external Param info.
	 * They need their own copies of local state, including a tuple table,
	 * es_param_exec_vals, etc.
2252 2253 2254
	 */
	epqstate->es_direction = ForwardScanDirection;
	epqstate->es_snapshot = estate->es_snapshot;
2255
	epqstate->es_crosscheck_snapshot = estate->es_crosscheck_snapshot;
2256 2257 2258 2259 2260 2261
	epqstate->es_range_table = estate->es_range_table;
	epqstate->es_result_relations = estate->es_result_relations;
	epqstate->es_num_result_relations = estate->es_num_result_relations;
	epqstate->es_result_relation_info = estate->es_result_relation_info;
	epqstate->es_junkFilter = estate->es_junkFilter;
	epqstate->es_into_relation_descriptor = estate->es_into_relation_descriptor;
2262
	epqstate->es_into_relation_use_wal = estate->es_into_relation_use_wal;
2263
	epqstate->es_param_list_info = estate->es_param_list_info;
2264
	if (estate->es_plannedstmt->nParamExec > 0)
2265
		epqstate->es_param_exec_vals = (ParamExecData *)
2266
			palloc0(estate->es_plannedstmt->nParamExec * sizeof(ParamExecData));
2267
	epqstate->es_rowMarks = estate->es_rowMarks;
2268
	epqstate->es_instrument = estate->es_instrument;
2269 2270
	epqstate->es_select_into = estate->es_select_into;
	epqstate->es_into_oids = estate->es_into_oids;
2271
	epqstate->es_plannedstmt = estate->es_plannedstmt;
B
Bruce Momjian 已提交
2272

2273
	/*
B
Bruce Momjian 已提交
2274 2275 2276
	 * Each epqstate must have its own es_evTupleNull state, but all the stack
	 * entries share es_evTuple state.	This allows sub-rechecks to inherit
	 * the value being examined by an outer recheck.
2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289
	 */
	epqstate->es_evTupleNull = (bool *) palloc0(rtsize * sizeof(bool));
	if (priorepq == NULL)
		/* first PQ stack entry */
		epqstate->es_evTuple = (HeapTuple *)
			palloc0(rtsize * sizeof(HeapTuple));
	else
		/* later stack entries share the same storage */
		epqstate->es_evTuple = priorepq->estate->es_evTuple;

	epqstate->es_tupleTable =
		ExecCreateTupleTable(estate->es_tupleTable->size);

2290
	epq->planstate = ExecInitNode(estate->es_plannedstmt->planTree, epqstate, 0);
2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318

	MemoryContextSwitchTo(oldcontext);
}

/*
 * End execution of one level of PlanQual.
 *
 * This is a cut-down version of ExecutorEnd(); basically we want to do most
 * of the normal cleanup, but *not* close result relations (which we are
 * just sharing from the outer query).
 */
static void
EvalPlanQualStop(evalPlanQual *epq)
{
	EState	   *epqstate = epq->estate;
	MemoryContext oldcontext;

	oldcontext = MemoryContextSwitchTo(epqstate->es_query_cxt);

	ExecEndNode(epq->planstate);

	ExecDropTupleTable(epqstate->es_tupleTable, true);
	epqstate->es_tupleTable = NULL;

	if (epqstate->es_evTuple[epq->rti - 1] != NULL)
	{
		heap_freetuple(epqstate->es_evTuple[epq->rti - 1]);
		epqstate->es_evTuple[epq->rti - 1] = NULL;
2319
	}
2320 2321 2322 2323 2324 2325 2326

	MemoryContextSwitchTo(oldcontext);

	FreeExecutorState(epqstate);

	epq->estate = NULL;
	epq->planstate = NULL;
2327
}
2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355


/*
 * Support for SELECT INTO (a/k/a CREATE TABLE AS)
 *
 * We implement SELECT INTO by diverting SELECT's normal output with
 * a specialized DestReceiver type.
 *
 * TODO: remove some of the INTO-specific cruft from EState, and keep
 * it in the DestReceiver instead.
 */

typedef struct
{
	DestReceiver pub;			/* publicly-known function pointers */
	EState	   *estate;			/* EState we are working with */
} DR_intorel;

/*
 * OpenIntoRel --- actually create the SELECT INTO target relation
 *
 * This also replaces QueryDesc->dest with the special DestReceiver for
 * SELECT INTO.  We assume that the correct result tuple type has already
 * been placed in queryDesc->tupDesc.
 */
static void
OpenIntoRel(QueryDesc *queryDesc)
{
2356
	IntoClause *into = queryDesc->plannedstmt->into;
2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367
	EState	   *estate = queryDesc->estate;
	Relation	intoRelationDesc;
	char	   *intoName;
	Oid			namespaceId;
	Oid			tablespaceId;
	Datum		reloptions;
	AclResult	aclresult;
	Oid			intoRelationId;
	TupleDesc	tupdesc;
	DR_intorel *myState;

2368 2369
	Assert(into);

2370 2371 2372
	/*
	 * Check consistency of arguments
	 */
2373
	if (into->onCommit != ONCOMMIT_NOOP && !into->rel->istemp)
2374 2375 2376 2377 2378 2379 2380
		ereport(ERROR,
				(errcode(ERRCODE_INVALID_TABLE_DEFINITION),
				 errmsg("ON COMMIT can only be used on temporary tables")));

	/*
	 * Find namespace to create in, check its permissions
	 */
2381 2382
	intoName = into->rel->relname;
	namespaceId = RangeVarGetCreationNamespace(into->rel);
2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393

	aclresult = pg_namespace_aclcheck(namespaceId, GetUserId(),
									  ACL_CREATE);
	if (aclresult != ACLCHECK_OK)
		aclcheck_error(aclresult, ACL_KIND_NAMESPACE,
					   get_namespace_name(namespaceId));

	/*
	 * Select tablespace to use.  If not specified, use default_tablespace
	 * (which may in turn default to database's default).
	 */
2394
	if (into->tableSpaceName)
2395
	{
2396
		tablespaceId = get_tablespace_oid(into->tableSpaceName);
2397 2398 2399 2400
		if (!OidIsValid(tablespaceId))
			ereport(ERROR,
					(errcode(ERRCODE_UNDEFINED_OBJECT),
					 errmsg("tablespace \"%s\" does not exist",
2401
							into->tableSpaceName)));
B
Bruce Momjian 已提交
2402
	}
2403
	else if (into->rel->istemp)
2404 2405 2406
	{
		tablespaceId = GetTempTablespace();
	}
B
Bruce Momjian 已提交
2407
	else
2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427
	{
		tablespaceId = GetDefaultTablespace();
		/* note InvalidOid is OK in this case */
	}

	/* Check permissions except when using the database's default space */
	if (OidIsValid(tablespaceId))
	{
		AclResult	aclresult;

		aclresult = pg_tablespace_aclcheck(tablespaceId, GetUserId(),
										   ACL_CREATE);

		if (aclresult != ACLCHECK_OK)
			aclcheck_error(aclresult, ACL_KIND_TABLESPACE,
						   get_tablespace_name(tablespaceId));
	}

	/* Parse and validate any reloptions */
	reloptions = transformRelOptions((Datum) 0,
2428
									 into->options,
2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446
									 true,
									 false);
	(void) heap_reloptions(RELKIND_RELATION, reloptions, true);

	/* have to copy the actual tupdesc to get rid of any constraints */
	tupdesc = CreateTupleDescCopy(queryDesc->tupDesc);

	/* Now we can actually create the new relation */
	intoRelationId = heap_create_with_catalog(intoName,
											  namespaceId,
											  tablespaceId,
											  InvalidOid,
											  GetUserId(),
											  tupdesc,
											  RELKIND_RELATION,
											  false,
											  true,
											  0,
2447
											  into->onCommit,
2448 2449 2450 2451 2452 2453
											  reloptions,
											  allowSystemTableMods);

	FreeTupleDesc(tupdesc);

	/*
B
Bruce Momjian 已提交
2454 2455
	 * Advance command counter so that the newly-created relation's catalog
	 * tuples will be visible to heap_open.
2456 2457 2458 2459 2460
	 */
	CommandCounterIncrement();

	/*
	 * If necessary, create a TOAST table for the INTO relation. Note that
B
Bruce Momjian 已提交
2461 2462
	 * AlterTableCreateToastTable ends with CommandCounterIncrement(), so that
	 * the TOAST table will be visible for insertion.
2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 2474 2475 2476
	 */
	AlterTableCreateToastTable(intoRelationId);

	/*
	 * And open the constructed table for writing.
	 */
	intoRelationDesc = heap_open(intoRelationId, AccessExclusiveLock);

	/* use_wal off requires rd_targblock be initially invalid */
	Assert(intoRelationDesc->rd_targblock == InvalidBlockNumber);

	/*
	 * We can skip WAL-logging the insertions, unless PITR is in use.
	 *
B
Bruce Momjian 已提交
2477 2478 2479 2480 2481
	 * Note that for a non-temp INTO table, this is safe only because we know
	 * that the catalog changes above will have been WAL-logged, and so
	 * RecordTransactionCommit will think it needs to WAL-log the eventual
	 * transaction commit.	Else the commit might be lost, even though all the
	 * data is safely fsync'd ...
2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 2512 2513
	 */
	estate->es_into_relation_use_wal = XLogArchivingActive();
	estate->es_into_relation_descriptor = intoRelationDesc;

	/*
	 * Now replace the query's DestReceiver with one for SELECT INTO
	 */
	queryDesc->dest = CreateDestReceiver(DestIntoRel, NULL);
	myState = (DR_intorel *) queryDesc->dest;
	Assert(myState->pub.mydest == DestIntoRel);
	myState->estate = estate;
}

/*
 * CloseIntoRel --- clean up SELECT INTO at ExecutorEnd time
 */
static void
CloseIntoRel(QueryDesc *queryDesc)
{
	EState	   *estate = queryDesc->estate;

	/* OpenIntoRel might never have gotten called */
	if (estate->es_into_relation_descriptor)
	{
		/*
		 * If we skipped using WAL, and it's not a temp relation, we must
		 * force the relation down to disk before it's safe to commit the
		 * transaction.  This requires forcing out any dirty buffers and then
		 * doing a forced fsync.
		 */
		if (!estate->es_into_relation_use_wal &&
			!estate->es_into_relation_descriptor->rd_istemp)
2514
			heap_sync(estate->es_into_relation_descriptor);
2515 2516 2517 2518 2519 2520 2521 2522 2523 2524 2525 2526 2527 2528 2529 2530 2531 2532 2533 2534 2535 2536 2537 2538 2539 2540 2541 2542 2543 2544 2545 2546 2547 2548 2549 2550 2551 2552 2553 2554 2555 2556 2557 2558 2559 2560 2561 2562 2563 2564 2565 2566 2567 2568 2569 2570 2571 2572 2573 2574 2575 2576 2577 2578 2579 2580 2581 2582 2583 2584 2585 2586 2587 2588 2589 2590 2591 2592 2593 2594 2595 2596

		/* close rel, but keep lock until commit */
		heap_close(estate->es_into_relation_descriptor, NoLock);

		estate->es_into_relation_descriptor = NULL;
	}
}

/*
 * CreateIntoRelDestReceiver -- create a suitable DestReceiver object
 *
 * Since CreateDestReceiver doesn't accept the parameters we'd need,
 * we just leave the private fields empty here.  OpenIntoRel will
 * fill them in.
 */
DestReceiver *
CreateIntoRelDestReceiver(void)
{
	DR_intorel *self = (DR_intorel *) palloc(sizeof(DR_intorel));

	self->pub.receiveSlot = intorel_receive;
	self->pub.rStartup = intorel_startup;
	self->pub.rShutdown = intorel_shutdown;
	self->pub.rDestroy = intorel_destroy;
	self->pub.mydest = DestIntoRel;

	self->estate = NULL;

	return (DestReceiver *) self;
}

/*
 * intorel_startup --- executor startup
 */
static void
intorel_startup(DestReceiver *self, int operation, TupleDesc typeinfo)
{
	/* no-op */
}

/*
 * intorel_receive --- receive one tuple
 */
static void
intorel_receive(TupleTableSlot *slot, DestReceiver *self)
{
	DR_intorel *myState = (DR_intorel *) self;
	EState	   *estate = myState->estate;
	HeapTuple	tuple;

	tuple = ExecCopySlotTuple(slot);

	heap_insert(estate->es_into_relation_descriptor,
				tuple,
				estate->es_snapshot->curcid,
				estate->es_into_relation_use_wal,
				false);			/* never any point in using FSM */

	/* We know this is a newly created relation, so there are no indexes */

	heap_freetuple(tuple);

	IncrAppended();
}

/*
 * intorel_shutdown --- executor end
 */
static void
intorel_shutdown(DestReceiver *self)
{
	/* no-op */
}

/*
 * intorel_destroy --- release DestReceiver object
 */
static void
intorel_destroy(DestReceiver *self)
{
	pfree(self);
}