execMain.c 71.6 KB
Newer Older
1 2
/*-------------------------------------------------------------------------
 *
3
 * execMain.c
4
 *	  top level executor interface routines
5 6
 *
 * INTERFACE ROUTINES
7 8 9
 *	ExecutorStart()
 *	ExecutorRun()
 *	ExecutorEnd()
10
 *
11 12 13 14
 *	The old ExecutorMain() has been replaced by ExecutorStart(),
 *	ExecutorRun() and ExecutorEnd()
 *
 *	These three procedures are the external interfaces to the executor.
15
 *	In each case, the query descriptor is required as an argument.
16
 *
17
 *	ExecutorStart() must be called at the beginning of execution of any
18 19 20
 *	query plan and ExecutorEnd() should always be called at the end of
 *	execution of a plan.
 *
21
 *	ExecutorRun accepts direction and count arguments that specify whether
22
 *	the plan is to be executed forwards, backwards, and for how many tuples.
23
 *
24
 * Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
B
Add:  
Bruce Momjian 已提交
25
 * Portions Copyright (c) 1994, Regents of the University of California
26 27 28
 *
 *
 * IDENTIFICATION
29
 *	  $PostgreSQL: pgsql/src/backend/executor/execMain.c,v 1.292 2007/03/29 00:15:38 tgl Exp $
30 31 32
 *
 *-------------------------------------------------------------------------
 */
33 34
#include "postgres.h"

35
#include "access/heapam.h"
36
#include "access/reloptions.h"
37 38
#include "access/transam.h"
#include "access/xact.h"
39
#include "catalog/heap.h"
40
#include "catalog/namespace.h"
41
#include "catalog/toasting.h"
42
#include "commands/tablespace.h"
43
#include "commands/trigger.h"
B
Bruce Momjian 已提交
44
#include "executor/execdebug.h"
45
#include "executor/instrument.h"
46
#include "executor/nodeSubplan.h"
B
Bruce Momjian 已提交
47
#include "miscadmin.h"
48
#include "optimizer/clauses.h"
49
#include "parser/parse_clause.h"
50
#include "parser/parsetree.h"
51
#include "storage/smgr.h"
B
Bruce Momjian 已提交
52
#include "utils/acl.h"
53
#include "utils/lsyscache.h"
54
#include "utils/memutils.h"
55

56

57 58 59 60 61 62 63 64 65
typedef struct evalPlanQual
{
	Index		rti;
	EState	   *estate;
	PlanState  *planstate;
	struct evalPlanQual *next;	/* stack of active PlanQual plans */
	struct evalPlanQual *free;	/* list of free PlanQual plans */
} evalPlanQual;

66
/* decls for local routines only used within this module */
67
static void InitPlan(QueryDesc *queryDesc, int eflags);
68
static void initResultRelInfo(ResultRelInfo *resultRelInfo,
B
Bruce Momjian 已提交
69 70
				  Index resultRelationIndex,
				  List *rangeTable,
71 72
				  CmdType operation,
				  bool doInstrument);
73
static void ExecEndPlan(PlanState *planstate, EState *estate);
74
static TupleTableSlot *ExecutePlan(EState *estate, PlanState *planstate,
B
Bruce Momjian 已提交
75 76 77
			CmdType operation,
			long numberTuples,
			ScanDirection direction,
78
			DestReceiver *dest);
79
static void ExecSelect(TupleTableSlot *slot,
B
Bruce Momjian 已提交
80
		   DestReceiver *dest, EState *estate);
81
static void ExecInsert(TupleTableSlot *slot, ItemPointer tupleid,
B
Bruce Momjian 已提交
82 83
		   TupleTableSlot *planSlot,
		   DestReceiver *dest, EState *estate);
84
static void ExecDelete(ItemPointer tupleid,
B
Bruce Momjian 已提交
85 86
		   TupleTableSlot *planSlot,
		   DestReceiver *dest, EState *estate);
87
static void ExecUpdate(TupleTableSlot *slot, ItemPointer tupleid,
B
Bruce Momjian 已提交
88 89 90
		   TupleTableSlot *planSlot,
		   DestReceiver *dest, EState *estate);
static void ExecProcessReturning(ProjectionInfo *projectReturning,
91 92 93
					 TupleTableSlot *tupleSlot,
					 TupleTableSlot *planSlot,
					 DestReceiver *dest);
94
static TupleTableSlot *EvalPlanQualNext(EState *estate);
95
static void EndEvalPlanQual(EState *estate);
96
static void ExecCheckRTPerms(List *rangeTable);
97
static void ExecCheckRTEPerms(RangeTblEntry *rte);
98
static void ExecCheckXactReadOnly(PlannedStmt *plannedstmt);
99
static void EvalPlanQualStart(evalPlanQual *epq, EState *estate,
B
Bruce Momjian 已提交
100
				  evalPlanQual *priorepq);
101
static void EvalPlanQualStop(evalPlanQual *epq);
102 103 104 105 106 107
static void OpenIntoRel(QueryDesc *queryDesc);
static void CloseIntoRel(QueryDesc *queryDesc);
static void intorel_startup(DestReceiver *self, int operation, TupleDesc typeinfo);
static void intorel_receive(TupleTableSlot *slot, DestReceiver *self);
static void intorel_shutdown(DestReceiver *self);
static void intorel_destroy(DestReceiver *self);
108

109 110
/* end of local decls */

111

112
/* ----------------------------------------------------------------
113 114 115 116 117
 *		ExecutorStart
 *
 *		This routine must be called at the beginning of any execution of any
 *		query plan
 *
118
 * Takes a QueryDesc previously created by CreateQueryDesc (it's not real
B
Bruce Momjian 已提交
119
 * clear why we bother to separate the two functions, but...).	The tupDesc
120 121
 * field of the QueryDesc is filled in to describe the tuples that will be
 * returned, and the internal fields (estate and planstate) are set up.
122
 *
123
 * eflags contains flag bits as described in executor.h.
124
 *
125 126
 * NB: the CurrentMemoryContext when this is called will become the parent
 * of the per-query context used for this Executor invocation.
127 128
 * ----------------------------------------------------------------
 */
129
void
130
ExecutorStart(QueryDesc *queryDesc, int eflags)
131
{
132
	EState	   *estate;
133
	MemoryContext oldcontext;
134

135
	/* sanity checks: queryDesc must not be started already */
136
	Assert(queryDesc != NULL);
137 138
	Assert(queryDesc->estate == NULL);

139
	/*
B
Bruce Momjian 已提交
140
	 * If the transaction is read-only, we need to check if any writes are
141
	 * planned to non-temporary tables.  EXPLAIN is considered read-only.
142
	 */
143
	if (XactReadOnly && !(eflags & EXEC_FLAG_EXPLAIN_ONLY))
144
		ExecCheckXactReadOnly(queryDesc->plannedstmt);
145

146
	/*
147
	 * Build EState, switch into per-query memory context for startup.
148 149 150 151
	 */
	estate = CreateExecutorState();
	queryDesc->estate = estate;

152 153 154 155 156
	oldcontext = MemoryContextSwitchTo(estate->es_query_cxt);

	/*
	 * Fill in parameters, if any, from queryDesc
	 */
157
	estate->es_param_list_info = queryDesc->params;
158

159
	if (queryDesc->plannedstmt->nParamExec > 0)
160
		estate->es_param_exec_vals = (ParamExecData *)
161
			palloc0(queryDesc->plannedstmt->nParamExec * sizeof(ParamExecData));
162

163
	/*
164
	 * Copy other important information into the EState
165
	 */
166 167 168
	estate->es_snapshot = queryDesc->snapshot;
	estate->es_crosscheck_snapshot = queryDesc->crosscheck_snapshot;
	estate->es_instrument = queryDesc->doInstrument;
169

170
	/*
171
	 * Initialize the plan state tree
172
	 */
173
	InitPlan(queryDesc, eflags);
174 175

	MemoryContextSwitchTo(oldcontext);
176 177 178
}

/* ----------------------------------------------------------------
179 180 181 182 183 184 185
 *		ExecutorRun
 *
 *		This is the main routine of the executor module. It accepts
 *		the query descriptor from the traffic cop and executes the
 *		query plan.
 *
 *		ExecutorStart must have been called already.
186
 *
187 188 189
 *		If direction is NoMovementScanDirection then nothing is done
 *		except to start up/shut down the destination.  Otherwise,
 *		we retrieve up to 'count' tuples in the specified direction.
190
 *
191
 *		Note: count = 0 is interpreted as no portal limit, i.e., run to
192
 *		completion.
193
 *
194 195
 * ----------------------------------------------------------------
 */
196
TupleTableSlot *
197
ExecutorRun(QueryDesc *queryDesc,
198
			ScanDirection direction, long count)
199
{
200
	EState	   *estate;
201
	CmdType		operation;
202
	DestReceiver *dest;
203
	bool		sendTuples;
204
	TupleTableSlot *result;
205 206 207 208 209 210 211 212
	MemoryContext oldcontext;

	/* sanity checks */
	Assert(queryDesc != NULL);

	estate = queryDesc->estate;

	Assert(estate != NULL);
213

B
Bruce Momjian 已提交
214
	/*
215
	 * Switch into per-query memory context
216
	 */
217
	oldcontext = MemoryContextSwitchTo(estate->es_query_cxt);
218

B
Bruce Momjian 已提交
219
	/*
B
Bruce Momjian 已提交
220
	 * extract information from the query descriptor and the query feature.
221
	 */
222 223 224
	operation = queryDesc->operation;
	dest = queryDesc->dest;

B
Bruce Momjian 已提交
225
	/*
226
	 * startup tuple receiver, if we will be emitting tuples
227
	 */
228 229
	estate->es_processed = 0;
	estate->es_lastoid = InvalidOid;
230

231
	sendTuples = (operation == CMD_SELECT ||
232
				  queryDesc->plannedstmt->returningLists);
233 234 235

	if (sendTuples)
		(*dest->rStartup) (dest, operation, queryDesc->tupDesc);
236

237 238 239
	/*
	 * run plan
	 */
240
	if (ScanDirectionIsNoMovement(direction))
241 242 243
		result = NULL;
	else
		result = ExecutePlan(estate,
244
							 queryDesc->planstate,
245 246 247
							 operation,
							 count,
							 direction,
248
							 dest);
249

250
	/*
251
	 * shutdown tuple receiver, if we started it
252
	 */
253 254
	if (sendTuples)
		(*dest->rShutdown) (dest);
255

256 257
	MemoryContextSwitchTo(oldcontext);

258
	return result;
259 260 261
}

/* ----------------------------------------------------------------
262 263
 *		ExecutorEnd
 *
264
 *		This routine must be called at the end of execution of any
265
 *		query plan
266 267 268
 * ----------------------------------------------------------------
 */
void
269
ExecutorEnd(QueryDesc *queryDesc)
270
{
271
	EState	   *estate;
272
	MemoryContext oldcontext;
273

274 275
	/* sanity checks */
	Assert(queryDesc != NULL);
276

277 278
	estate = queryDesc->estate;

279
	Assert(estate != NULL);
280

281
	/*
282
	 * Switch into per-query memory context to run ExecEndPlan
283
	 */
284 285 286
	oldcontext = MemoryContextSwitchTo(estate->es_query_cxt);

	ExecEndPlan(queryDesc->planstate, estate);
287

288 289 290 291 292 293
	/*
	 * Close the SELECT INTO relation if any
	 */
	if (estate->es_select_into)
		CloseIntoRel(queryDesc);

294
	/*
295
	 * Must switch out of context before destroying it
296
	 */
297
	MemoryContextSwitchTo(oldcontext);
298

299
	/*
300 301
	 * Release EState and per-query memory context.  This should release
	 * everything the executor has allocated.
302
	 */
303 304 305 306 307 308
	FreeExecutorState(estate);

	/* Reset queryDesc fields that no longer point to anything */
	queryDesc->tupDesc = NULL;
	queryDesc->estate = NULL;
	queryDesc->planstate = NULL;
309
}
310

311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346
/* ----------------------------------------------------------------
 *		ExecutorRewind
 *
 *		This routine may be called on an open queryDesc to rewind it
 *		to the start.
 * ----------------------------------------------------------------
 */
void
ExecutorRewind(QueryDesc *queryDesc)
{
	EState	   *estate;
	MemoryContext oldcontext;

	/* sanity checks */
	Assert(queryDesc != NULL);

	estate = queryDesc->estate;

	Assert(estate != NULL);

	/* It's probably not sensible to rescan updating queries */
	Assert(queryDesc->operation == CMD_SELECT);

	/*
	 * Switch into per-query memory context
	 */
	oldcontext = MemoryContextSwitchTo(estate->es_query_cxt);

	/*
	 * rescan plan
	 */
	ExecReScan(queryDesc->planstate, NULL);

	MemoryContextSwitchTo(oldcontext);
}

347

348 349 350 351
/*
 * ExecCheckRTPerms
 *		Check access permissions for all relations listed in a range table.
 */
352
static void
353
ExecCheckRTPerms(List *rangeTable)
354
{
355
	ListCell   *l;
356

357
	foreach(l, rangeTable)
358
	{
359
		ExecCheckRTEPerms((RangeTblEntry *) lfirst(l));
360 361 362 363 364 365 366 367
	}
}

/*
 * ExecCheckRTEPerms
 *		Check access permissions for a single RTE.
 */
static void
368
ExecCheckRTEPerms(RangeTblEntry *rte)
369
{
370
	AclMode		requiredPerms;
371
	Oid			relOid;
B
Bruce Momjian 已提交
372
	Oid			userid;
373

B
Bruce Momjian 已提交
374
	/*
375
	 * Only plain-relation RTEs need to be checked here.  Function RTEs are
B
Bruce Momjian 已提交
376
	 * checked by init_fcache when the function is prepared for execution.
377
	 * Join, subquery, and special RTEs need no checks.
B
Bruce Momjian 已提交
378
	 */
379
	if (rte->rtekind != RTE_RELATION)
380 381
		return;

382 383 384 385 386 387 388
	/*
	 * No work if requiredPerms is empty.
	 */
	requiredPerms = rte->requiredPerms;
	if (requiredPerms == 0)
		return;

389
	relOid = rte->relid;
390 391

	/*
B
Bruce Momjian 已提交
392
	 * userid to check as: current user unless we have a setuid indication.
393
	 *
394 395 396 397
	 * Note: GetUserId() is presently fast enough that there's no harm in
	 * calling it separately for each RTE.	If that stops being true, we could
	 * call it once in ExecCheckRTPerms and pass the userid down from there.
	 * But for now, no need for the extra clutter.
398
	 */
399
	userid = rte->checkAsUser ? rte->checkAsUser : GetUserId();
400

401
	/*
B
Bruce Momjian 已提交
402
	 * We must have *all* the requiredPerms bits, so use aclmask not aclcheck.
403
	 */
404 405 406 407
	if (pg_class_aclmask(relOid, userid, requiredPerms, ACLMASK_ALL)
		!= requiredPerms)
		aclcheck_error(ACLCHECK_NO_PRIV, ACL_KIND_CLASS,
					   get_rel_name(relOid));
408 409
}

410 411 412
/*
 * Check that the query does not imply any writes to non-temp tables.
 */
413
static void
414
ExecCheckXactReadOnly(PlannedStmt *plannedstmt)
415
{
416 417
	ListCell   *l;

418 419 420 421 422
	/*
	 * CREATE TABLE AS or SELECT INTO?
	 *
	 * XXX should we allow this if the destination is temp?
	 */
423
	if (plannedstmt->into != NULL)
424 425
		goto fail;

426
	/* Fail if write permissions are requested on any non-temp table */
427
	foreach(l, plannedstmt->rtable)
428
	{
429
		RangeTblEntry *rte = (RangeTblEntry *) lfirst(l);
430

431 432
		if (rte->rtekind != RTE_RELATION)
			continue;
433

434 435
		if ((rte->requiredPerms & (~ACL_SELECT)) == 0)
			continue;
436

437 438
		if (isTempNamespace(get_rel_namespace(rte->relid)))
			continue;
439

440
		goto fail;
441 442 443 444 445
	}

	return;

fail:
446 447 448
	ereport(ERROR,
			(errcode(ERRCODE_READ_ONLY_SQL_TRANSACTION),
			 errmsg("transaction is read-only")));
449 450 451
}


452
/* ----------------------------------------------------------------
453 454 455 456
 *		InitPlan
 *
 *		Initializes the query plan: open files, allocate storage
 *		and start up the rule manager
457 458
 * ----------------------------------------------------------------
 */
459
static void
460
InitPlan(QueryDesc *queryDesc, int eflags)
461
{
462
	CmdType		operation = queryDesc->operation;
463 464 465
	PlannedStmt *plannedstmt = queryDesc->plannedstmt;
	Plan	   *plan = plannedstmt->planTree;
	List	   *rangeTable = plannedstmt->rtable;
B
Bruce Momjian 已提交
466
	EState	   *estate = queryDesc->estate;
467
	PlanState  *planstate;
B
Bruce Momjian 已提交
468
	TupleDesc	tupType;
469
	ListCell   *l;
470
	int			i;
471

472
	/*
473
	 * Do permissions checks
474
	 */
475
	ExecCheckRTPerms(rangeTable);
476

B
Bruce Momjian 已提交
477
	/*
B
Bruce Momjian 已提交
478
	 * initialize the node's execution state
479
	 */
480 481
	estate->es_range_table = rangeTable;

B
Bruce Momjian 已提交
482
	/*
483
	 * initialize result relation stuff
484
	 */
485
	if (plannedstmt->resultRelations)
486
	{
487 488
		List	   *resultRelations = plannedstmt->resultRelations;
		int			numResultRelations = list_length(resultRelations);
489
		ResultRelInfo *resultRelInfos;
490
		ResultRelInfo *resultRelInfo;
B
Bruce Momjian 已提交
491

492 493 494 495
		resultRelInfos = (ResultRelInfo *)
			palloc(numResultRelations * sizeof(ResultRelInfo));
		resultRelInfo = resultRelInfos;
		foreach(l, resultRelations)
496
		{
497 498
			initResultRelInfo(resultRelInfo,
							  lfirst_int(l),
499
							  rangeTable,
500 501
							  operation,
							  estate->es_instrument);
502
			resultRelInfo++;
503 504 505 506 507
		}
		estate->es_result_relations = resultRelInfos;
		estate->es_num_result_relations = numResultRelations;
		/* Initialize to first or only result rel */
		estate->es_result_relation_info = resultRelInfos;
508
	}
509 510
	else
	{
B
Bruce Momjian 已提交
511
		/*
B
Bruce Momjian 已提交
512
		 * if no result relation, then set state appropriately
513
		 */
514 515
		estate->es_result_relations = NULL;
		estate->es_num_result_relations = 0;
516 517 518
		estate->es_result_relation_info = NULL;
	}

519
	/*
T
Tom Lane 已提交
520
	 * Detect whether we're doing SELECT INTO.  If so, set the es_into_oids
B
Bruce Momjian 已提交
521
	 * flag appropriately so that the plan tree will be initialized with the
522
	 * correct tuple descriptors.  (Other SELECT INTO stuff comes later.)
523
	 */
524
	estate->es_select_into = false;
525
	if (operation == CMD_SELECT && plannedstmt->into != NULL)
526
	{
527
		estate->es_select_into = true;
528
		estate->es_into_oids = interpretOidsOption(plannedstmt->into->options);
529 530
	}

531
	/*
532 533 534
	 * Have to lock relations selected FOR UPDATE/FOR SHARE before we
	 * initialize the plan tree, else we'd be doing a lock upgrade.
	 * While we are at it, build the ExecRowMark list.
535
	 */
536
	estate->es_rowMarks = NIL;
537
	foreach(l, plannedstmt->rowMarks)
538
	{
539 540 541 542 543 544 545 546 547 548 549
		RowMarkClause *rc = (RowMarkClause *) lfirst(l);
		Oid			relid = getrelid(rc->rti, rangeTable);
		Relation	relation;
		ExecRowMark *erm;

		relation = heap_open(relid, RowShareLock);
		erm = (ExecRowMark *) palloc(sizeof(ExecRowMark));
		erm->relation = relation;
		erm->rti = rc->rti;
		erm->forUpdate = rc->forUpdate;
		erm->noWait = rc->noWait;
550 551
		/* We'll set up ctidAttno below */
		erm->ctidAttNo = InvalidAttrNumber;
552
		estate->es_rowMarks = lappend(estate->es_rowMarks, erm);
553
	}
554

B
Bruce Momjian 已提交
555
	/*
556
	 * Initialize the executor "tuple" table.  We need slots for all the plan
B
Bruce Momjian 已提交
557 558
	 * nodes, plus possibly output slots for the junkfilter(s). At this point
	 * we aren't sure if we need junkfilters, so just add slots for them
559
	 * unconditionally.  Also, if it's not a SELECT, set up a slot for use for
560
	 * trigger output tuples.  Also, one for RETURNING-list evaluation.
561 562
	 */
	{
563 564 565 566 567 568 569 570
		int			nSlots;

		/* Slots for the main plan tree */
		nSlots = ExecCountSlotsNode(plan);
		/* Add slots for subplans and initplans */
		foreach(l, plannedstmt->subplans)
		{
			Plan   *subplan = (Plan *) lfirst(l);
571

572 573 574
			nSlots += ExecCountSlotsNode(subplan);
		}
		/* Add slots for junkfilter(s) */
575 576
		if (plannedstmt->resultRelations != NIL)
			nSlots += list_length(plannedstmt->resultRelations);
577 578
		else
			nSlots += 1;
579
		if (operation != CMD_SELECT)
580
			nSlots++;			/* for es_trig_tuple_slot */
581
		if (plannedstmt->returningLists)
582
			nSlots++;			/* for RETURNING projection */
583

584
		estate->es_tupleTable = ExecCreateTupleTable(nSlots);
585 586 587 588

		if (operation != CMD_SELECT)
			estate->es_trig_tuple_slot =
				ExecAllocTableSlot(estate->es_tupleTable);
589
	}
590

591
	/* mark EvalPlanQual not active */
592
	estate->es_plannedstmt = plannedstmt;
593 594
	estate->es_evalPlanQual = NULL;
	estate->es_evTupleNull = NULL;
595
	estate->es_evTuple = NULL;
596 597
	estate->es_useEvalPlan = false;

B
Bruce Momjian 已提交
598
	/*
599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630
	 * Initialize private state information for each SubPlan.  We must do
	 * this before running ExecInitNode on the main query tree, since
	 * ExecInitSubPlan expects to be able to find these entries.
	 */
	Assert(estate->es_subplanstates == NIL);
	i = 1;						/* subplan indices count from 1 */
	foreach(l, plannedstmt->subplans)
	{
		Plan   *subplan = (Plan *) lfirst(l);
		PlanState *subplanstate;
		int		sp_eflags;

		/*
		 * A subplan will never need to do BACKWARD scan nor MARK/RESTORE.
		 * If it is a parameterless subplan (not initplan), we suggest that it
		 * be prepared to handle REWIND efficiently; otherwise there is no
		 * need.
		 */
		sp_eflags = eflags & EXEC_FLAG_EXPLAIN_ONLY;
		if (bms_is_member(i, plannedstmt->rewindPlanIDs))
			sp_eflags |= EXEC_FLAG_REWIND;

		subplanstate = ExecInitNode(subplan, estate, sp_eflags);

		estate->es_subplanstates = lappend(estate->es_subplanstates,
										   subplanstate);

		i++;
	}

	/*
	 * Initialize the private state information for all the nodes in the query
B
Bruce Momjian 已提交
631 632
	 * tree.  This opens files, allocates storage and leaves us ready to start
	 * processing tuples.
633
	 */
634
	planstate = ExecInitNode(plan, estate, eflags);
635

B
Bruce Momjian 已提交
636
	/*
B
Bruce Momjian 已提交
637 638 639
	 * Get the tuple descriptor describing the type of tuples to return. (this
	 * is especially important if we are creating a relation with "SELECT
	 * INTO")
640
	 */
641
	tupType = ExecGetResultType(planstate);
642

B
Bruce Momjian 已提交
643
	/*
B
Bruce Momjian 已提交
644 645 646 647 648 649
	 * Initialize the junk filter if needed.  SELECT and INSERT queries need a
	 * filter if there are any junk attrs in the tlist.  INSERT and SELECT
	 * INTO also need a filter if the plan may return raw disk tuples (else
	 * heap_insert will be scribbling on the source relation!). UPDATE and
	 * DELETE always need a filter, since there's always a junk 'ctid'
	 * attribute present --- no need to look first.
650 651
	 */
	{
652
		bool		junk_filter_needed = false;
653
		ListCell   *tlist;
654

655
		switch (operation)
656
		{
657 658
			case CMD_SELECT:
			case CMD_INSERT:
659
				foreach(tlist, plan->targetlist)
660
				{
661 662
					TargetEntry *tle = (TargetEntry *) lfirst(tlist);

663
					if (tle->resjunk)
664 665 666 667
					{
						junk_filter_needed = true;
						break;
					}
668
				}
669
				if (!junk_filter_needed &&
670
					(operation == CMD_INSERT || estate->es_select_into) &&
671 672
					ExecMayReturnRawTuples(planstate))
					junk_filter_needed = true;
673 674 675 676 677 678 679
				break;
			case CMD_UPDATE:
			case CMD_DELETE:
				junk_filter_needed = true;
				break;
			default:
				break;
680 681
		}

682
		if (junk_filter_needed)
683
		{
684
			/*
B
Bruce Momjian 已提交
685 686 687
			 * If there are multiple result relations, each one needs its own
			 * junk filter.  Note this is only possible for UPDATE/DELETE, so
			 * we can't be fooled by some needing a filter and some not.
688
			 */
689
			if (list_length(plannedstmt->resultRelations) > 1)
690
			{
691 692
				PlanState **appendplans;
				int			as_nplans;
693 694 695 696 697
				ResultRelInfo *resultRelInfo;

				/* Top plan had better be an Append here. */
				Assert(IsA(plan, Append));
				Assert(((Append *) plan)->isTarget);
698 699 700 701
				Assert(IsA(planstate, AppendState));
				appendplans = ((AppendState *) planstate)->appendplans;
				as_nplans = ((AppendState *) planstate)->as_nplans;
				Assert(as_nplans == estate->es_num_result_relations);
702
				resultRelInfo = estate->es_result_relations;
703
				for (i = 0; i < as_nplans; i++)
704
				{
705
					PlanState  *subplan = appendplans[i];
706 707
					JunkFilter *j;

708
					j = ExecInitJunkFilter(subplan->plan->targetlist,
B
Bruce Momjian 已提交
709 710
							resultRelInfo->ri_RelationDesc->rd_att->tdhasoid,
								  ExecAllocTableSlot(estate->es_tupleTable));
711 712 713 714 715 716 717 718 719 720
					/*
					 * Since it must be UPDATE/DELETE, there had better be
					 * a "ctid" junk attribute in the tlist ... but ctid could
					 * be at a different resno for each result relation.
					 * We look up the ctid resnos now and save them in the
					 * junkfilters.
					 */
					j->jf_junkAttNo = ExecFindJunkAttribute(j, "ctid");
					if (!AttributeNumberIsValid(j->jf_junkAttNo))
						elog(ERROR, "could not find junk ctid column");
721 722 723
					resultRelInfo->ri_junkFilter = j;
					resultRelInfo++;
				}
B
Bruce Momjian 已提交
724

725
				/*
B
Bruce Momjian 已提交
726 727
				 * Set active junkfilter too; at this point ExecInitAppend has
				 * already selected an active result relation...
728 729 730 731 732 733 734
				 */
				estate->es_junkFilter =
					estate->es_result_relation_info->ri_junkFilter;
			}
			else
			{
				/* Normal case with just one JunkFilter */
735
				JunkFilter *j;
736

737
				j = ExecInitJunkFilter(planstate->plan->targetlist,
738
									   tupType->tdhasoid,
B
Bruce Momjian 已提交
739
								  ExecAllocTableSlot(estate->es_tupleTable));
740 741 742
				estate->es_junkFilter = j;
				if (estate->es_result_relation_info)
					estate->es_result_relation_info->ri_junkFilter = j;
743

744
				if (operation == CMD_SELECT)
745 746
				{
					/* For SELECT, want to return the cleaned tuple type */
747
					tupType = j->jf_cleanTupType;
748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767
					/* For SELECT FOR UPDATE/SHARE, find the ctid attrs now */
					foreach(l, estate->es_rowMarks)
					{
						ExecRowMark *erm = (ExecRowMark *) lfirst(l);
						char		resname[32];

						snprintf(resname, sizeof(resname), "ctid%u", erm->rti);
						erm->ctidAttNo = ExecFindJunkAttribute(j, resname);
						if (!AttributeNumberIsValid(erm->ctidAttNo))
							elog(ERROR, "could not find junk \"%s\" column",
								 resname);
					}
				}
				else if (operation == CMD_UPDATE || operation == CMD_DELETE)
				{
					/* For UPDATE/DELETE, find the ctid junk attr now */
					j->jf_junkAttNo = ExecFindJunkAttribute(j, "ctid");
					if (!AttributeNumberIsValid(j->jf_junkAttNo))
						elog(ERROR, "could not find junk ctid column");
				}
768
			}
769 770 771 772
		}
		else
			estate->es_junkFilter = NULL;
	}
773

B
Bruce Momjian 已提交
774
	/*
775
	 * Initialize RETURNING projections if needed.
776
	 */
777
	if (plannedstmt->returningLists)
778
	{
779 780 781
		TupleTableSlot *slot;
		ExprContext *econtext;
		ResultRelInfo *resultRelInfo;
782

783
		/*
784 785
		 * We set QueryDesc.tupDesc to be the RETURNING rowtype in this case.
		 * We assume all the sublists will generate the same output tupdesc.
786
		 */
787
		tupType = ExecTypeFromTL((List *) linitial(plannedstmt->returningLists),
788
								 false);
789

790 791 792 793 794
		/* Set up a slot for the output of the RETURNING projection(s) */
		slot = ExecAllocTableSlot(estate->es_tupleTable);
		ExecSetSlotDescriptor(slot, tupType);
		/* Need an econtext too */
		econtext = CreateExprContext(estate);
795

796
		/*
B
Bruce Momjian 已提交
797 798
		 * Build a projection for each result rel.	Note that any SubPlans in
		 * the RETURNING lists get attached to the topmost plan node.
799
		 */
800
		Assert(list_length(plannedstmt->returningLists) == estate->es_num_result_relations);
801
		resultRelInfo = estate->es_result_relations;
802
		foreach(l, plannedstmt->returningLists)
803
		{
B
Bruce Momjian 已提交
804 805
			List	   *rlist = (List *) lfirst(l);
			List	   *rliststate;
806

807 808
			rliststate = (List *) ExecInitExpr((Expr *) rlist, planstate);
			resultRelInfo->ri_projectReturning =
809 810
				ExecBuildProjectionInfo(rliststate, econtext, slot,
									   resultRelInfo->ri_RelationDesc->rd_att);
811
			resultRelInfo++;
812
		}
813 814
	}

815 816
	queryDesc->tupDesc = tupType;
	queryDesc->planstate = planstate;
817 818 819 820 821 822 823 824 825 826

	/*
	 * If doing SELECT INTO, initialize the "into" relation.  We must wait
	 * till now so we have the "clean" result tuple type to create the new
	 * table from.
	 *
	 * If EXPLAIN, skip creating the "into" relation.
	 */
	if (estate->es_select_into && !(eflags & EXEC_FLAG_EXPLAIN_ONLY))
		OpenIntoRel(queryDesc);
827 828
}

829 830 831 832 833 834 835
/*
 * Initialize ResultRelInfo data for one result relation
 */
static void
initResultRelInfo(ResultRelInfo *resultRelInfo,
				  Index resultRelationIndex,
				  List *rangeTable,
836 837
				  CmdType operation,
				  bool doInstrument)
838 839 840 841 842 843 844 845 846 847
{
	Oid			resultRelationOid;
	Relation	resultRelationDesc;

	resultRelationOid = getrelid(resultRelationIndex, rangeTable);
	resultRelationDesc = heap_open(resultRelationOid, RowExclusiveLock);

	switch (resultRelationDesc->rd_rel->relkind)
	{
		case RELKIND_SEQUENCE:
848 849
			ereport(ERROR,
					(errcode(ERRCODE_WRONG_OBJECT_TYPE),
850
					 errmsg("cannot change sequence \"%s\"",
B
Bruce Momjian 已提交
851
							RelationGetRelationName(resultRelationDesc))));
852 853
			break;
		case RELKIND_TOASTVALUE:
854 855
			ereport(ERROR,
					(errcode(ERRCODE_WRONG_OBJECT_TYPE),
856
					 errmsg("cannot change TOAST relation \"%s\"",
B
Bruce Momjian 已提交
857
							RelationGetRelationName(resultRelationDesc))));
858 859
			break;
		case RELKIND_VIEW:
860 861
			ereport(ERROR,
					(errcode(ERRCODE_WRONG_OBJECT_TYPE),
862
					 errmsg("cannot change view \"%s\"",
B
Bruce Momjian 已提交
863
							RelationGetRelationName(resultRelationDesc))));
864 865 866 867 868 869 870 871 872 873
			break;
	}

	MemSet(resultRelInfo, 0, sizeof(ResultRelInfo));
	resultRelInfo->type = T_ResultRelInfo;
	resultRelInfo->ri_RangeTableIndex = resultRelationIndex;
	resultRelInfo->ri_RelationDesc = resultRelationDesc;
	resultRelInfo->ri_NumIndices = 0;
	resultRelInfo->ri_IndexRelationDescs = NULL;
	resultRelInfo->ri_IndexRelationInfo = NULL;
874 875
	/* make a copy so as not to depend on relcache info not changing... */
	resultRelInfo->ri_TrigDesc = CopyTriggerDesc(resultRelationDesc->trigdesc);
876 877
	if (resultRelInfo->ri_TrigDesc)
	{
B
Bruce Momjian 已提交
878
		int			n = resultRelInfo->ri_TrigDesc->numtriggers;
879 880 881 882 883 884 885 886 887 888 889 890 891

		resultRelInfo->ri_TrigFunctions = (FmgrInfo *)
			palloc0(n * sizeof(FmgrInfo));
		if (doInstrument)
			resultRelInfo->ri_TrigInstrument = InstrAlloc(n);
		else
			resultRelInfo->ri_TrigInstrument = NULL;
	}
	else
	{
		resultRelInfo->ri_TrigFunctions = NULL;
		resultRelInfo->ri_TrigInstrument = NULL;
	}
892 893
	resultRelInfo->ri_ConstraintExprs = NULL;
	resultRelInfo->ri_junkFilter = NULL;
894
	resultRelInfo->ri_projectReturning = NULL;
895 896 897

	/*
	 * If there are indices on the result relation, open them and save
B
Bruce Momjian 已提交
898 899 900
	 * descriptors in the result relation info, so that we can add new index
	 * entries for the tuples we add/update.  We need not do this for a
	 * DELETE, however, since deletion doesn't affect indexes.
901 902 903 904 905 906
	 */
	if (resultRelationDesc->rd_rel->relhasindex &&
		operation != CMD_DELETE)
		ExecOpenIndices(resultRelInfo);
}

907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963
/*
 *		ExecContextForcesOids
 *
 * This is pretty grotty: when doing INSERT, UPDATE, or SELECT INTO,
 * we need to ensure that result tuples have space for an OID iff they are
 * going to be stored into a relation that has OIDs.  In other contexts
 * we are free to choose whether to leave space for OIDs in result tuples
 * (we generally don't want to, but we do if a physical-tlist optimization
 * is possible).  This routine checks the plan context and returns TRUE if the
 * choice is forced, FALSE if the choice is not forced.  In the TRUE case,
 * *hasoids is set to the required value.
 *
 * One reason this is ugly is that all plan nodes in the plan tree will emit
 * tuples with space for an OID, though we really only need the topmost node
 * to do so.  However, node types like Sort don't project new tuples but just
 * return their inputs, and in those cases the requirement propagates down
 * to the input node.  Eventually we might make this code smart enough to
 * recognize how far down the requirement really goes, but for now we just
 * make all plan nodes do the same thing if the top level forces the choice.
 *
 * We assume that estate->es_result_relation_info is already set up to
 * describe the target relation.  Note that in an UPDATE that spans an
 * inheritance tree, some of the target relations may have OIDs and some not.
 * We have to make the decisions on a per-relation basis as we initialize
 * each of the child plans of the topmost Append plan.
 *
 * SELECT INTO is even uglier, because we don't have the INTO relation's
 * descriptor available when this code runs; we have to look aside at a
 * flag set by InitPlan().
 */
bool
ExecContextForcesOids(PlanState *planstate, bool *hasoids)
{
	if (planstate->state->es_select_into)
	{
		*hasoids = planstate->state->es_into_oids;
		return true;
	}
	else
	{
		ResultRelInfo *ri = planstate->state->es_result_relation_info;

		if (ri != NULL)
		{
			Relation	rel = ri->ri_RelationDesc;

			if (rel != NULL)
			{
				*hasoids = rel->rd_rel->relhasoids;
				return true;
			}
		}
	}

	return false;
}

964
/* ----------------------------------------------------------------
965
 *		ExecEndPlan
966
 *
967
 *		Cleans up the query plan -- closes files and frees up storage
968 969 970 971 972 973
 *
 * NOTE: we are no longer very worried about freeing storage per se
 * in this code; FreeExecutorState should be guaranteed to release all
 * memory that needs to be released.  What we are worried about doing
 * is closing relations and dropping buffer pins.  Thus, for example,
 * tuple tables must be cleared or dropped to ensure pins are released.
974 975
 * ----------------------------------------------------------------
 */
976
static void
977
ExecEndPlan(PlanState *planstate, EState *estate)
978
{
979 980
	ResultRelInfo *resultRelInfo;
	int			i;
981
	ListCell   *l;
982

983 984 985 986 987 988
	/*
	 * shut down any PlanQual processing we were doing
	 */
	if (estate->es_evalPlanQual != NULL)
		EndEvalPlanQual(estate);

B
Bruce Momjian 已提交
989
	/*
990
	 * shut down the node-type-specific query processing
991
	 */
992
	ExecEndNode(planstate);
993

994 995 996 997 998 999 1000 1001 1002 1003
	/*
	 * for subplans too
	 */
	foreach(l, estate->es_subplanstates)
	{
		PlanState *subplanstate = (PlanState *) lfirst(l);

		ExecEndNode(subplanstate);
	}

B
Bruce Momjian 已提交
1004
	/*
B
Bruce Momjian 已提交
1005
	 * destroy the executor "tuple" table.
1006
	 */
1007 1008
	ExecDropTupleTable(estate->es_tupleTable, true);
	estate->es_tupleTable = NULL;
1009

B
Bruce Momjian 已提交
1010
	/*
B
Bruce Momjian 已提交
1011
	 * close the result relation(s) if any, but hold locks until xact commit.
1012
	 */
1013 1014
	resultRelInfo = estate->es_result_relations;
	for (i = estate->es_num_result_relations; i > 0; i--)
1015
	{
1016 1017 1018 1019
		/* Close indices and then the relation itself */
		ExecCloseIndices(resultRelInfo);
		heap_close(resultRelInfo->ri_RelationDesc, NoLock);
		resultRelInfo++;
1020 1021
	}

1022
	/*
1023
	 * close any relations selected FOR UPDATE/FOR SHARE, again keeping locks
1024
	 */
1025
	foreach(l, estate->es_rowMarks)
1026
	{
1027
		ExecRowMark *erm = lfirst(l);
1028 1029 1030

		heap_close(erm->relation, NoLock);
	}
1031 1032 1033
}

/* ----------------------------------------------------------------
1034 1035
 *		ExecutePlan
 *
1036
 *		processes the query plan to retrieve 'numberTuples' tuples in the
1037
 *		direction specified.
1038
 *
1039
 *		Retrieves all tuples if numberTuples is 0
1040
 *
1041
 *		result is either a slot containing the last tuple in the case
1042
 *		of a SELECT or NULL otherwise.
1043
 *
1044 1045
 * Note: the ctid attribute is a 'junk' attribute that is removed before the
 * user can see it
1046 1047 1048
 * ----------------------------------------------------------------
 */
static TupleTableSlot *
1049
ExecutePlan(EState *estate,
1050
			PlanState *planstate,
1051
			CmdType operation,
1052
			long numberTuples,
1053
			ScanDirection direction,
1054
			DestReceiver *dest)
1055
{
B
Bruce Momjian 已提交
1056
	JunkFilter *junkfilter;
1057
	TupleTableSlot *planSlot;
B
Bruce Momjian 已提交
1058 1059 1060 1061 1062
	TupleTableSlot *slot;
	ItemPointer tupleid = NULL;
	ItemPointerData tuple_ctid;
	long		current_tuple_count;
	TupleTableSlot *result;
1063

B
Bruce Momjian 已提交
1064
	/*
B
Bruce Momjian 已提交
1065
	 * initialize local variables
1066
	 */
1067 1068 1069
	current_tuple_count = 0;
	result = NULL;

B
Bruce Momjian 已提交
1070 1071
	/*
	 * Set the direction.
1072
	 */
1073 1074
	estate->es_direction = direction;

1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090
	/*
	 * Process BEFORE EACH STATEMENT triggers
	 */
	switch (operation)
	{
		case CMD_UPDATE:
			ExecBSUpdateTriggers(estate, estate->es_result_relation_info);
			break;
		case CMD_DELETE:
			ExecBSDeleteTriggers(estate, estate->es_result_relation_info);
			break;
		case CMD_INSERT:
			ExecBSInsertTriggers(estate, estate->es_result_relation_info);
			break;
		default:
			/* do nothing */
1091
			break;
1092 1093
	}

B
Bruce Momjian 已提交
1094
	/*
B
Bruce Momjian 已提交
1095
	 * Loop until we've processed the proper number of tuples from the plan.
1096 1097 1098 1099
	 */

	for (;;)
	{
1100 1101
		/* Reset the per-output-tuple exprcontext */
		ResetPerTupleExprContext(estate);
B
Bruce Momjian 已提交
1102

B
Bruce Momjian 已提交
1103
		/*
B
Bruce Momjian 已提交
1104
		 * Execute the plan and obtain a tuple
1105
		 */
B
Bruce Momjian 已提交
1106
lnext:	;
1107 1108
		if (estate->es_useEvalPlan)
		{
1109 1110 1111
			planSlot = EvalPlanQualNext(estate);
			if (TupIsNull(planSlot))
				planSlot = ExecProcNode(planstate);
1112 1113
		}
		else
1114
			planSlot = ExecProcNode(planstate);
1115

B
Bruce Momjian 已提交
1116
		/*
B
Bruce Momjian 已提交
1117 1118
		 * if the tuple is null, then we assume there is nothing more to
		 * process so we just return null...
1119
		 */
1120
		if (TupIsNull(planSlot))
1121 1122 1123
		{
			result = NULL;
			break;
1124
		}
1125
		slot = planSlot;
1126

B
Bruce Momjian 已提交
1127
		/*
B
Bruce Momjian 已提交
1128 1129
		 * if we have a junk filter, then project a new tuple with the junk
		 * removed.
1130
		 *
1131
		 * Store this new "clean" tuple in the junkfilter's resultSlot.
B
Bruce Momjian 已提交
1132 1133
		 * (Formerly, we stored it back over the "dirty" tuple, which is WRONG
		 * because that tuple slot has the wrong descriptor.)
1134
		 *
B
Bruce Momjian 已提交
1135
		 * Also, extract all the junk information we need.
1136
		 */
1137
		if ((junkfilter = estate->es_junkFilter) != NULL)
1138
		{
1139 1140
			Datum		datum;
			bool		isNull;
1141

B
Bruce Momjian 已提交
1142
			/*
1143 1144 1145 1146
			 * extract the 'ctid' junk attribute.
			 */
			if (operation == CMD_UPDATE || operation == CMD_DELETE)
			{
1147 1148
				datum = ExecGetJunkAttribute(slot, junkfilter->jf_junkAttNo,
											 &isNull);
1149
				/* shouldn't ever get a null result... */
1150
				if (isNull)
1151
					elog(ERROR, "ctid is NULL");
1152 1153

				tupleid = (ItemPointer) DatumGetPointer(datum);
B
Bruce Momjian 已提交
1154
				tuple_ctid = *tupleid;	/* make sure we don't free the ctid!! */
1155 1156
				tupleid = &tuple_ctid;
			}
B
Bruce Momjian 已提交
1157

1158 1159 1160
			/*
			 * Process any FOR UPDATE or FOR SHARE locking requested.
			 */
1161
			else if (estate->es_rowMarks != NIL)
1162
			{
1163
				ListCell   *l;
1164

B
Bruce Momjian 已提交
1165
		lmark:	;
1166
				foreach(l, estate->es_rowMarks)
1167
				{
1168
					ExecRowMark *erm = lfirst(l);
1169
					HeapTupleData tuple;
1170 1171 1172
					Buffer		buffer;
					ItemPointerData update_ctid;
					TransactionId update_xmax;
1173
					TupleTableSlot *newSlot;
B
Bruce Momjian 已提交
1174 1175
					LockTupleMode lockmode;
					HTSU_Result test;
1176

1177 1178 1179
					datum = ExecGetJunkAttribute(slot,
												 erm->ctidAttNo,
												 &isNull);
1180
					/* shouldn't ever get a null result... */
1181
					if (isNull)
1182
						elog(ERROR, "ctid is NULL");
1183

1184 1185
					tuple.t_self = *((ItemPointer) DatumGetPointer(datum));

1186
					if (erm->forUpdate)
1187 1188 1189 1190 1191
						lockmode = LockTupleExclusive;
					else
						lockmode = LockTupleShared;

					test = heap_lock_tuple(erm->relation, &tuple, &buffer,
1192 1193
										   &update_ctid, &update_xmax,
										   estate->es_snapshot->curcid,
1194
										   lockmode, erm->noWait);
1195 1196 1197 1198
					ReleaseBuffer(buffer);
					switch (test)
					{
						case HeapTupleSelfUpdated:
1199 1200 1201
							/* treat it as deleted; do not process */
							goto lnext;

1202 1203 1204 1205
						case HeapTupleMayBeUpdated:
							break;

						case HeapTupleUpdated:
1206
							if (IsXactIsoLevelSerializable)
1207
								ereport(ERROR,
B
Bruce Momjian 已提交
1208 1209
								 (errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
								  errmsg("could not serialize access due to concurrent update")));
1210 1211
							if (!ItemPointerEquals(&update_ctid,
												   &tuple.t_self))
1212
							{
1213 1214 1215 1216
								/* updated, so look at updated version */
								newSlot = EvalPlanQual(estate,
													   erm->rti,
													   &update_ctid,
1217
													   update_xmax,
B
Bruce Momjian 已提交
1218
												estate->es_snapshot->curcid);
1219
								if (!TupIsNull(newSlot))
1220
								{
1221
									slot = planSlot = newSlot;
1222 1223 1224 1225
									estate->es_useEvalPlan = true;
									goto lmark;
								}
							}
B
Bruce Momjian 已提交
1226 1227 1228

							/*
							 * if tuple was deleted or PlanQual failed for
B
Bruce Momjian 已提交
1229
							 * updated tuple - we must not return this tuple!
1230 1231
							 */
							goto lnext;
1232 1233

						default:
1234
							elog(ERROR, "unrecognized heap_lock_tuple status: %u",
1235
								 test);
1236
							return NULL;
1237 1238 1239
					}
				}
			}
1240

B
Bruce Momjian 已提交
1241
			/*
B
Bruce Momjian 已提交
1242 1243 1244
			 * Create a new "clean" tuple with all junk attributes removed. We
			 * don't need to do this for DELETE, however (there will in fact
			 * be no non-junk attributes in a DELETE!)
1245
			 */
1246 1247
			if (operation != CMD_DELETE)
				slot = ExecFilterJunk(junkfilter, slot);
1248
		}
1249

B
Bruce Momjian 已提交
1250
		/*
B
Bruce Momjian 已提交
1251 1252 1253
		 * now that we have a tuple, do the appropriate thing with it.. either
		 * return it to the user, add it to a relation someplace, delete it
		 * from a relation, or modify some of its attributes.
1254 1255 1256
		 */
		switch (operation)
		{
1257
			case CMD_SELECT:
1258
				ExecSelect(slot, dest, estate);
1259 1260
				result = slot;
				break;
1261

1262
			case CMD_INSERT:
1263
				ExecInsert(slot, tupleid, planSlot, dest, estate);
1264 1265
				result = NULL;
				break;
1266

1267
			case CMD_DELETE:
1268
				ExecDelete(tupleid, planSlot, dest, estate);
1269 1270
				result = NULL;
				break;
1271

1272
			case CMD_UPDATE:
1273
				ExecUpdate(slot, tupleid, planSlot, dest, estate);
1274 1275
				result = NULL;
				break;
1276

1277
			default:
1278 1279
				elog(ERROR, "unrecognized operation code: %d",
					 (int) operation);
1280
				result = NULL;
1281
				break;
1282
		}
B
Bruce Momjian 已提交
1283

B
Bruce Momjian 已提交
1284
		/*
B
Bruce Momjian 已提交
1285 1286 1287
		 * check our tuple count.. if we've processed the proper number then
		 * quit, else loop again and process more tuples.  Zero numberTuples
		 * means no limit.
1288
		 */
1289
		current_tuple_count++;
1290
		if (numberTuples && numberTuples == current_tuple_count)
1291
			break;
1292
	}
1293

1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309
	/*
	 * Process AFTER EACH STATEMENT triggers
	 */
	switch (operation)
	{
		case CMD_UPDATE:
			ExecASUpdateTriggers(estate, estate->es_result_relation_info);
			break;
		case CMD_DELETE:
			ExecASDeleteTriggers(estate, estate->es_result_relation_info);
			break;
		case CMD_INSERT:
			ExecASInsertTriggers(estate, estate->es_result_relation_info);
			break;
		default:
			/* do nothing */
1310
			break;
1311 1312
	}

B
Bruce Momjian 已提交
1313
	/*
B
Bruce Momjian 已提交
1314
	 * here, result is either a slot containing a tuple in the case of a
1315
	 * SELECT or NULL otherwise.
1316
	 */
1317
	return result;
1318 1319 1320
}

/* ----------------------------------------------------------------
1321
 *		ExecSelect
1322
 *
1323
 *		SELECTs are easy.. we just pass the tuple to the appropriate
1324
 *		output function.
1325 1326 1327
 * ----------------------------------------------------------------
 */
static void
1328
ExecSelect(TupleTableSlot *slot,
1329
		   DestReceiver *dest,
1330
		   EState *estate)
1331
{
1332
	(*dest->receiveSlot) (slot, dest);
1333 1334
	IncrRetrieved();
	(estate->es_processed)++;
1335 1336 1337
}

/* ----------------------------------------------------------------
1338
 *		ExecInsert
1339
 *
1340
 *		INSERTs are trickier.. we have to insert the tuple into
1341 1342
 *		the base relation and insert appropriate tuples into the
 *		index relations.
1343 1344 1345
 * ----------------------------------------------------------------
 */
static void
1346
ExecInsert(TupleTableSlot *slot,
1347
		   ItemPointer tupleid,
1348 1349
		   TupleTableSlot *planSlot,
		   DestReceiver *dest,
1350
		   EState *estate)
1351
{
1352
	HeapTuple	tuple;
1353
	ResultRelInfo *resultRelInfo;
1354 1355
	Relation	resultRelationDesc;
	Oid			newId;
1356

B
Bruce Momjian 已提交
1357
	/*
B
Bruce Momjian 已提交
1358 1359
	 * get the heap tuple out of the tuple table slot, making sure we have a
	 * writable copy
1360
	 */
1361
	tuple = ExecMaterializeSlot(slot);
1362

B
Bruce Momjian 已提交
1363
	/*
1364
	 * get information on the (current) result relation
1365
	 */
1366 1367
	resultRelInfo = estate->es_result_relation_info;
	resultRelationDesc = resultRelInfo->ri_RelationDesc;
1368 1369

	/* BEFORE ROW INSERT Triggers */
1370
	if (resultRelInfo->ri_TrigDesc &&
B
Bruce Momjian 已提交
1371
		resultRelInfo->ri_TrigDesc->n_before_row[TRIGGER_EVENT_INSERT] > 0)
1372
	{
1373
		HeapTuple	newtuple;
1374

1375
		newtuple = ExecBRInsertTriggers(estate, resultRelInfo, tuple);
1376 1377 1378 1379 1380 1381

		if (newtuple == NULL)	/* "do nothing" */
			return;

		if (newtuple != tuple)	/* modified by Trigger(s) */
		{
1382
			/*
1383 1384
			 * Put the modified tuple into a slot for convenience of routines
			 * below.  We assume the tuple was allocated in per-tuple memory
B
Bruce Momjian 已提交
1385 1386
			 * context, and therefore will go away by itself. The tuple table
			 * slot should not try to clear it.
1387
			 */
1388 1389 1390
			TupleTableSlot *newslot = estate->es_trig_tuple_slot;

			if (newslot->tts_tupleDescriptor != slot->tts_tupleDescriptor)
1391
				ExecSetSlotDescriptor(newslot, slot->tts_tupleDescriptor);
1392 1393
			ExecStoreTuple(newtuple, newslot, InvalidBuffer, false);
			slot = newslot;
1394
			tuple = newtuple;
1395 1396 1397
		}
	}

B
Bruce Momjian 已提交
1398
	/*
1399
	 * Check the constraints of the tuple
1400 1401
	 */
	if (resultRelationDesc->rd_att->constr)
1402
		ExecConstraints(resultRelInfo, slot, estate);
1403

B
Bruce Momjian 已提交
1404
	/*
B
Bruce Momjian 已提交
1405
	 * insert the tuple
1406
	 *
B
Bruce Momjian 已提交
1407 1408
	 * Note: heap_insert returns the tid (location) of the new tuple in the
	 * t_self field.
1409
	 */
1410
	newId = heap_insert(resultRelationDesc, tuple,
1411 1412
						estate->es_snapshot->curcid,
						true, true);
1413

1414
	IncrAppended();
1415 1416
	(estate->es_processed)++;
	estate->es_lastoid = newId;
T
Tom Lane 已提交
1417
	setLastTid(&(tuple->t_self));
1418

B
Bruce Momjian 已提交
1419
	/*
1420
	 * insert index entries for tuple
1421
	 */
1422
	if (resultRelInfo->ri_NumIndices > 0)
1423
		ExecInsertIndexTuples(slot, &(tuple->t_self), estate, false);
1424 1425

	/* AFTER ROW INSERT Triggers */
1426
	ExecARInsertTriggers(estate, resultRelInfo, tuple);
1427 1428 1429 1430 1431

	/* Process RETURNING if present */
	if (resultRelInfo->ri_projectReturning)
		ExecProcessReturning(resultRelInfo->ri_projectReturning,
							 slot, planSlot, dest);
1432 1433 1434
}

/* ----------------------------------------------------------------
1435
 *		ExecDelete
1436
 *
1437 1438
 *		DELETE is like UPDATE, except that we delete the tuple and no
 *		index modifications are needed
1439 1440 1441
 * ----------------------------------------------------------------
 */
static void
1442 1443 1444
ExecDelete(ItemPointer tupleid,
		   TupleTableSlot *planSlot,
		   DestReceiver *dest,
1445
		   EState *estate)
1446
{
1447
	ResultRelInfo *resultRelInfo;
B
Bruce Momjian 已提交
1448
	Relation	resultRelationDesc;
B
Bruce Momjian 已提交
1449
	HTSU_Result result;
1450 1451
	ItemPointerData update_ctid;
	TransactionId update_xmax;
1452

B
Bruce Momjian 已提交
1453
	/*
1454
	 * get information on the (current) result relation
1455
	 */
1456 1457
	resultRelInfo = estate->es_result_relation_info;
	resultRelationDesc = resultRelInfo->ri_RelationDesc;
1458 1459

	/* BEFORE ROW DELETE Triggers */
1460
	if (resultRelInfo->ri_TrigDesc &&
B
Bruce Momjian 已提交
1461
		resultRelInfo->ri_TrigDesc->n_before_row[TRIGGER_EVENT_DELETE] > 0)
1462
	{
1463
		bool		dodelete;
1464

1465
		dodelete = ExecBRDeleteTriggers(estate, resultRelInfo, tupleid,
1466
										estate->es_snapshot->curcid);
1467 1468 1469 1470 1471

		if (!dodelete)			/* "do nothing" */
			return;
	}

V
Vadim B. Mikheev 已提交
1472
	/*
B
Bruce Momjian 已提交
1473
	 * delete the tuple
1474
	 *
1475 1476
	 * Note: if es_crosscheck_snapshot isn't InvalidSnapshot, we check that
	 * the row to be deleted is visible to that snapshot, and throw a can't-
B
Bruce Momjian 已提交
1477
	 * serialize error if not.	This is a special-case behavior needed for
1478
	 * referential integrity updates in serializable transactions.
1479
	 */
1480
ldelete:;
1481
	result = heap_delete(resultRelationDesc, tupleid,
1482
						 &update_ctid, &update_xmax,
1483 1484
						 estate->es_snapshot->curcid,
						 estate->es_crosscheck_snapshot,
B
Bruce Momjian 已提交
1485
						 true /* wait for commit */ );
V
Vadim B. Mikheev 已提交
1486 1487 1488
	switch (result)
	{
		case HeapTupleSelfUpdated:
1489
			/* already deleted by self; nothing to do */
V
Vadim B. Mikheev 已提交
1490 1491 1492 1493 1494 1495
			return;

		case HeapTupleMayBeUpdated:
			break;

		case HeapTupleUpdated:
1496
			if (IsXactIsoLevelSerializable)
1497 1498
				ereport(ERROR,
						(errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
1499
						 errmsg("could not serialize access due to concurrent update")));
1500
			else if (!ItemPointerEquals(tupleid, &update_ctid))
1501
			{
1502
				TupleTableSlot *epqslot;
1503

1504 1505 1506
				epqslot = EvalPlanQual(estate,
									   resultRelInfo->ri_RangeTableIndex,
									   &update_ctid,
1507 1508
									   update_xmax,
									   estate->es_snapshot->curcid);
V
Vadim B. Mikheev 已提交
1509
				if (!TupIsNull(epqslot))
1510
				{
1511
					*tupleid = update_ctid;
1512 1513 1514
					goto ldelete;
				}
			}
1515
			/* tuple already deleted; nothing to do */
V
Vadim B. Mikheev 已提交
1516 1517 1518
			return;

		default:
1519
			elog(ERROR, "unrecognized heap_delete status: %u", result);
V
Vadim B. Mikheev 已提交
1520 1521
			return;
	}
1522 1523 1524 1525

	IncrDeleted();
	(estate->es_processed)++;

B
Bruce Momjian 已提交
1526
	/*
B
Bruce Momjian 已提交
1527
	 * Note: Normally one would think that we have to delete index tuples
1528
	 * associated with the heap tuple now...
1529
	 *
1530 1531 1532
	 * ... but in POSTGRES, we have no need to do this because VACUUM will
	 * take care of it later.  We can't delete index tuples immediately
	 * anyway, since the tuple is still visible to other transactions.
1533 1534 1535
	 */

	/* AFTER ROW DELETE Triggers */
1536
	ExecARDeleteTriggers(estate, resultRelInfo, tupleid);
1537 1538 1539 1540 1541

	/* Process RETURNING if present */
	if (resultRelInfo->ri_projectReturning)
	{
		/*
B
Bruce Momjian 已提交
1542 1543
		 * We have to put the target tuple into a slot, which means first we
		 * gotta fetch it.	We can use the trigger tuple slot.
1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563
		 */
		TupleTableSlot *slot = estate->es_trig_tuple_slot;
		HeapTupleData deltuple;
		Buffer		delbuffer;

		deltuple.t_self = *tupleid;
		if (!heap_fetch(resultRelationDesc, SnapshotAny,
						&deltuple, &delbuffer, false, NULL))
			elog(ERROR, "failed to fetch deleted tuple for DELETE RETURNING");

		if (slot->tts_tupleDescriptor != RelationGetDescr(resultRelationDesc))
			ExecSetSlotDescriptor(slot, RelationGetDescr(resultRelationDesc));
		ExecStoreTuple(&deltuple, slot, InvalidBuffer, false);

		ExecProcessReturning(resultRelInfo->ri_projectReturning,
							 slot, planSlot, dest);

		ExecClearTuple(slot);
		ReleaseBuffer(delbuffer);
	}
1564 1565 1566
}

/* ----------------------------------------------------------------
1567
 *		ExecUpdate
1568
 *
1569 1570 1571 1572
 *		note: we can't run UPDATE queries with transactions
 *		off because UPDATEs are actually INSERTs and our
 *		scan will mistakenly loop forever, updating the tuple
 *		it just inserted..	This should be fixed but until it
1573 1574
 *		is, we don't want to get stuck in an infinite loop
 *		which corrupts your database..
1575 1576 1577
 * ----------------------------------------------------------------
 */
static void
1578
ExecUpdate(TupleTableSlot *slot,
B
Bruce Momjian 已提交
1579
		   ItemPointer tupleid,
1580 1581
		   TupleTableSlot *planSlot,
		   DestReceiver *dest,
B
Bruce Momjian 已提交
1582
		   EState *estate)
1583
{
B
Bruce Momjian 已提交
1584
	HeapTuple	tuple;
1585
	ResultRelInfo *resultRelInfo;
B
Bruce Momjian 已提交
1586
	Relation	resultRelationDesc;
B
Bruce Momjian 已提交
1587
	HTSU_Result result;
1588 1589
	ItemPointerData update_ctid;
	TransactionId update_xmax;
1590

B
Bruce Momjian 已提交
1591
	/*
B
Bruce Momjian 已提交
1592
	 * abort the operation if not running transactions
1593 1594
	 */
	if (IsBootstrapProcessingMode())
1595
		elog(ERROR, "cannot UPDATE during bootstrap");
1596

B
Bruce Momjian 已提交
1597
	/*
B
Bruce Momjian 已提交
1598 1599
	 * get the heap tuple out of the tuple table slot, making sure we have a
	 * writable copy
1600
	 */
1601
	tuple = ExecMaterializeSlot(slot);
1602

B
Bruce Momjian 已提交
1603
	/*
1604
	 * get information on the (current) result relation
1605
	 */
1606 1607
	resultRelInfo = estate->es_result_relation_info;
	resultRelationDesc = resultRelInfo->ri_RelationDesc;
1608 1609

	/* BEFORE ROW UPDATE Triggers */
1610
	if (resultRelInfo->ri_TrigDesc &&
B
Bruce Momjian 已提交
1611
		resultRelInfo->ri_TrigDesc->n_before_row[TRIGGER_EVENT_UPDATE] > 0)
1612
	{
1613
		HeapTuple	newtuple;
1614

1615
		newtuple = ExecBRUpdateTriggers(estate, resultRelInfo,
1616
										tupleid, tuple,
1617
										estate->es_snapshot->curcid);
1618 1619 1620 1621 1622 1623

		if (newtuple == NULL)	/* "do nothing" */
			return;

		if (newtuple != tuple)	/* modified by Trigger(s) */
		{
1624
			/*
1625 1626
			 * Put the modified tuple into a slot for convenience of routines
			 * below.  We assume the tuple was allocated in per-tuple memory
B
Bruce Momjian 已提交
1627 1628
			 * context, and therefore will go away by itself. The tuple table
			 * slot should not try to clear it.
1629
			 */
1630 1631 1632
			TupleTableSlot *newslot = estate->es_trig_tuple_slot;

			if (newslot->tts_tupleDescriptor != slot->tts_tupleDescriptor)
1633
				ExecSetSlotDescriptor(newslot, slot->tts_tupleDescriptor);
1634 1635
			ExecStoreTuple(newtuple, newslot, InvalidBuffer, false);
			slot = newslot;
1636
			tuple = newtuple;
1637 1638 1639
		}
	}

B
Bruce Momjian 已提交
1640
	/*
1641
	 * Check the constraints of the tuple
1642
	 *
1643 1644
	 * If we generate a new candidate tuple after EvalPlanQual testing, we
	 * must loop back here and recheck constraints.  (We don't need to redo
B
Bruce Momjian 已提交
1645 1646 1647
	 * triggers, however.  If there are any BEFORE triggers then trigger.c
	 * will have done heap_lock_tuple to lock the correct tuple, so there's no
	 * need to do them again.)
1648
	 */
1649
lreplace:;
1650
	if (resultRelationDesc->rd_att->constr)
1651
		ExecConstraints(resultRelInfo, slot, estate);
1652

V
Vadim B. Mikheev 已提交
1653
	/*
B
Bruce Momjian 已提交
1654
	 * replace the heap tuple
1655
	 *
1656 1657
	 * Note: if es_crosscheck_snapshot isn't InvalidSnapshot, we check that
	 * the row to be updated is visible to that snapshot, and throw a can't-
B
Bruce Momjian 已提交
1658
	 * serialize error if not.	This is a special-case behavior needed for
1659
	 * referential integrity updates in serializable transactions.
1660
	 */
1661
	result = heap_update(resultRelationDesc, tupleid, tuple,
1662
						 &update_ctid, &update_xmax,
1663 1664
						 estate->es_snapshot->curcid,
						 estate->es_crosscheck_snapshot,
B
Bruce Momjian 已提交
1665
						 true /* wait for commit */ );
V
Vadim B. Mikheev 已提交
1666 1667 1668
	switch (result)
	{
		case HeapTupleSelfUpdated:
1669
			/* already deleted by self; nothing to do */
V
Vadim B. Mikheev 已提交
1670 1671 1672 1673 1674 1675
			return;

		case HeapTupleMayBeUpdated:
			break;

		case HeapTupleUpdated:
1676
			if (IsXactIsoLevelSerializable)
1677 1678
				ereport(ERROR,
						(errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
1679
						 errmsg("could not serialize access due to concurrent update")));
1680
			else if (!ItemPointerEquals(tupleid, &update_ctid))
1681
			{
1682
				TupleTableSlot *epqslot;
1683

1684 1685 1686
				epqslot = EvalPlanQual(estate,
									   resultRelInfo->ri_RangeTableIndex,
									   &update_ctid,
1687 1688
									   update_xmax,
									   estate->es_snapshot->curcid);
V
Vadim B. Mikheev 已提交
1689
				if (!TupIsNull(epqslot))
1690
				{
1691
					*tupleid = update_ctid;
1692 1693
					slot = ExecFilterJunk(estate->es_junkFilter, epqslot);
					tuple = ExecMaterializeSlot(slot);
1694 1695 1696
					goto lreplace;
				}
			}
1697
			/* tuple already deleted; nothing to do */
V
Vadim B. Mikheev 已提交
1698 1699 1700
			return;

		default:
1701
			elog(ERROR, "unrecognized heap_update status: %u", result);
V
Vadim B. Mikheev 已提交
1702
			return;
1703 1704 1705 1706 1707
	}

	IncrReplaced();
	(estate->es_processed)++;

B
Bruce Momjian 已提交
1708
	/*
B
Bruce Momjian 已提交
1709 1710 1711
	 * Note: instead of having to update the old index tuples associated with
	 * the heap tuple, all we do is form and insert new index tuples. This is
	 * because UPDATEs are actually DELETEs and INSERTs, and index tuple
1712
	 * deletion is done later by VACUUM (see notes in ExecDelete).	All we do
1713
	 * here is insert new index tuples.  -cim 9/27/89
1714 1715
	 */

B
Bruce Momjian 已提交
1716
	/*
1717
	 * insert index entries for tuple
1718
	 *
B
Bruce Momjian 已提交
1719 1720
	 * Note: heap_update returns the tid (location) of the new tuple in the
	 * t_self field.
1721
	 */
1722
	if (resultRelInfo->ri_NumIndices > 0)
1723
		ExecInsertIndexTuples(slot, &(tuple->t_self), estate, false);
1724 1725

	/* AFTER ROW UPDATE Triggers */
1726
	ExecARUpdateTriggers(estate, resultRelInfo, tupleid, tuple);
1727 1728 1729 1730 1731

	/* Process RETURNING if present */
	if (resultRelInfo->ri_projectReturning)
		ExecProcessReturning(resultRelInfo->ri_projectReturning,
							 slot, planSlot, dest);
1732
}
V
Vadim B. Mikheev 已提交
1733

1734 1735 1736
/*
 * ExecRelCheck --- check that tuple meets constraints for result relation
 */
1737
static const char *
1738 1739
ExecRelCheck(ResultRelInfo *resultRelInfo,
			 TupleTableSlot *slot, EState *estate)
V
Vadim B. Mikheev 已提交
1740
{
1741
	Relation	rel = resultRelInfo->ri_RelationDesc;
1742 1743
	int			ncheck = rel->rd_att->constr->num_check;
	ConstrCheck *check = rel->rd_att->constr->check;
1744
	ExprContext *econtext;
1745
	MemoryContext oldContext;
1746 1747
	List	   *qual;
	int			i;
1748

1749 1750
	/*
	 * If first time through for this result relation, build expression
B
Bruce Momjian 已提交
1751 1752
	 * nodetrees for rel's constraint expressions.  Keep them in the per-query
	 * memory context so they'll survive throughout the query.
1753 1754 1755 1756 1757 1758 1759 1760
	 */
	if (resultRelInfo->ri_ConstraintExprs == NULL)
	{
		oldContext = MemoryContextSwitchTo(estate->es_query_cxt);
		resultRelInfo->ri_ConstraintExprs =
			(List **) palloc(ncheck * sizeof(List *));
		for (i = 0; i < ncheck; i++)
		{
1761 1762
			/* ExecQual wants implicit-AND form */
			qual = make_ands_implicit(stringToNode(check[i].ccbin));
1763
			resultRelInfo->ri_ConstraintExprs[i] = (List *)
1764
				ExecPrepareExpr((Expr *) qual, estate);
1765 1766 1767 1768
		}
		MemoryContextSwitchTo(oldContext);
	}

1769
	/*
B
Bruce Momjian 已提交
1770 1771
	 * We will use the EState's per-tuple context for evaluating constraint
	 * expressions (creating it if it's not already there).
1772
	 */
1773
	econtext = GetPerTupleExprContext(estate);
1774

1775 1776 1777 1778
	/* Arrange for econtext's scan tuple to be the tuple under test */
	econtext->ecxt_scantuple = slot;

	/* And evaluate the constraints */
1779 1780
	for (i = 0; i < ncheck; i++)
	{
1781
		qual = resultRelInfo->ri_ConstraintExprs[i];
1782

1783 1784
		/*
		 * NOTE: SQL92 specifies that a NULL result from a constraint
1785 1786
		 * expression is not to be treated as a failure.  Therefore, tell
		 * ExecQual to return TRUE for NULL.
1787
		 */
1788
		if (!ExecQual(qual, econtext, true))
1789
			return check[i].ccname;
1790 1791
	}

1792
	/* NULL result means no error */
1793
	return NULL;
V
Vadim B. Mikheev 已提交
1794 1795
}

1796
void
1797
ExecConstraints(ResultRelInfo *resultRelInfo,
1798
				TupleTableSlot *slot, EState *estate)
V
Vadim B. Mikheev 已提交
1799
{
1800
	Relation	rel = resultRelInfo->ri_RelationDesc;
1801 1802 1803
	TupleConstr *constr = rel->rd_att->constr;

	Assert(constr);
1804

1805
	if (constr->has_not_null)
V
Vadim B. Mikheev 已提交
1806
	{
1807
		int			natts = rel->rd_att->natts;
1808
		int			attrChk;
1809

1810
		for (attrChk = 1; attrChk <= natts; attrChk++)
1811
		{
B
Bruce Momjian 已提交
1812
			if (rel->rd_att->attrs[attrChk - 1]->attnotnull &&
1813
				slot_attisnull(slot, attrChk))
1814 1815
				ereport(ERROR,
						(errcode(ERRCODE_NOT_NULL_VIOLATION),
1816
						 errmsg("null value in column \"%s\" violates not-null constraint",
B
Bruce Momjian 已提交
1817
						NameStr(rel->rd_att->attrs[attrChk - 1]->attname))));
1818 1819 1820
		}
	}

1821
	if (constr->num_check > 0)
1822
	{
B
Bruce Momjian 已提交
1823
		const char *failed;
1824

1825
		if ((failed = ExecRelCheck(resultRelInfo, slot, estate)) != NULL)
1826 1827
			ereport(ERROR,
					(errcode(ERRCODE_CHECK_VIOLATION),
1828
					 errmsg("new row for relation \"%s\" violates check constraint \"%s\"",
1829
							RelationGetRelationName(rel), failed)));
1830
	}
V
Vadim B. Mikheev 已提交
1831
}
1832

1833 1834 1835 1836 1837 1838 1839 1840 1841
/*
 * ExecProcessReturning --- evaluate a RETURNING list and send to dest
 *
 * projectReturning: RETURNING projection info for current result rel
 * tupleSlot: slot holding tuple actually inserted/updated/deleted
 * planSlot: slot holding tuple returned by top plan node
 * dest: where to send the output
 */
static void
B
Bruce Momjian 已提交
1842
ExecProcessReturning(ProjectionInfo *projectReturning,
1843 1844 1845 1846
					 TupleTableSlot *tupleSlot,
					 TupleTableSlot *planSlot,
					 DestReceiver *dest)
{
B
Bruce Momjian 已提交
1847 1848
	ExprContext *econtext = projectReturning->pi_exprContext;
	TupleTableSlot *retSlot;
1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868

	/*
	 * Reset per-tuple memory context to free any expression evaluation
	 * storage allocated in the previous cycle.
	 */
	ResetExprContext(econtext);

	/* Make tuple and any needed join variables available to ExecProject */
	econtext->ecxt_scantuple = tupleSlot;
	econtext->ecxt_outertuple = planSlot;

	/* Compute the RETURNING expressions */
	retSlot = ExecProject(projectReturning, NULL);

	/* Send to dest */
	(*dest->receiveSlot) (retSlot, dest);

	ExecClearTuple(retSlot);
}

1869 1870 1871 1872 1873
/*
 * Check a modified tuple to see if we want to process its updated version
 * under READ COMMITTED rules.
 *
 * See backend/executor/README for some info about how this works.
1874 1875 1876 1877 1878
 *
 *	estate - executor state data
 *	rti - rangetable index of table containing tuple
 *	*tid - t_ctid from the outdated tuple (ie, next updated version)
 *	priorXmax - t_xmax from the outdated tuple
1879
 *	curCid - command ID of current command of my transaction
1880 1881 1882 1883 1884 1885
 *
 * *tid is also an output parameter: it's modified to hold the TID of the
 * latest version of the tuple (note this may be changed even on failure)
 *
 * Returns a slot containing the new candidate update/delete tuple, or
 * NULL if we determine we shouldn't process the row.
1886
 */
B
Bruce Momjian 已提交
1887
TupleTableSlot *
1888
EvalPlanQual(EState *estate, Index rti,
1889
			 ItemPointer tid, TransactionId priorXmax, CommandId curCid)
1890
{
1891 1892
	evalPlanQual *epq;
	EState	   *epqstate;
B
Bruce Momjian 已提交
1893 1894
	Relation	relation;
	HeapTupleData tuple;
1895
	HeapTuple	copyTuple = NULL;
1896
	SnapshotData SnapshotDirty;
1897
	bool		endNode;
1898 1899 1900

	Assert(rti != 0);

1901 1902 1903 1904 1905 1906 1907 1908
	/*
	 * find relation containing target tuple
	 */
	if (estate->es_result_relation_info != NULL &&
		estate->es_result_relation_info->ri_RangeTableIndex == rti)
		relation = estate->es_result_relation_info->ri_RelationDesc;
	else
	{
1909
		ListCell   *l;
1910 1911

		relation = NULL;
1912
		foreach(l, estate->es_rowMarks)
1913
		{
1914
			if (((ExecRowMark *) lfirst(l))->rti == rti)
1915
			{
1916
				relation = ((ExecRowMark *) lfirst(l))->relation;
1917 1918 1919 1920
				break;
			}
		}
		if (relation == NULL)
1921
			elog(ERROR, "could not find RowMark for RT index %u", rti);
1922 1923 1924 1925 1926 1927 1928
	}

	/*
	 * fetch tid tuple
	 *
	 * Loop here to deal with updated or busy tuples
	 */
1929
	InitDirtySnapshot(SnapshotDirty);
1930 1931 1932 1933 1934
	tuple.t_self = *tid;
	for (;;)
	{
		Buffer		buffer;

1935
		if (heap_fetch(relation, &SnapshotDirty, &tuple, &buffer, true, NULL))
1936
		{
1937 1938
			/*
			 * If xmin isn't what we're expecting, the slot must have been
B
Bruce Momjian 已提交
1939 1940 1941
			 * recycled and reused for an unrelated tuple.	This implies that
			 * the latest version of the row was deleted, so we need do
			 * nothing.  (Should be safe to examine xmin without getting
1942 1943 1944 1945 1946 1947 1948 1949 1950
			 * buffer's content lock, since xmin never changes in an existing
			 * tuple.)
			 */
			if (!TransactionIdEquals(HeapTupleHeaderGetXmin(tuple.t_data),
									 priorXmax))
			{
				ReleaseBuffer(buffer);
				return NULL;
			}
1951

1952
			/* otherwise xmin should not be dirty... */
1953
			if (TransactionIdIsValid(SnapshotDirty.xmin))
1954
				elog(ERROR, "t_xmin is uncommitted in tuple to be updated");
1955 1956

			/*
B
Bruce Momjian 已提交
1957 1958
			 * If tuple is being updated by other transaction then we have to
			 * wait for its commit/abort.
1959
			 */
1960
			if (TransactionIdIsValid(SnapshotDirty.xmax))
1961 1962
			{
				ReleaseBuffer(buffer);
1963
				XactLockTableWait(SnapshotDirty.xmax);
1964
				continue;		/* loop back to repeat heap_fetch */
1965 1966
			}

1967 1968 1969 1970
			/*
			 * If tuple was inserted by our own transaction, we have to check
			 * cmin against curCid: cmin >= curCid means our command cannot
			 * see the tuple, so we should ignore it.  Without this we are
B
Bruce Momjian 已提交
1971 1972 1973 1974 1975 1976
			 * open to the "Halloween problem" of indefinitely re-updating the
			 * same tuple.	(We need not check cmax because
			 * HeapTupleSatisfiesDirty will consider a tuple deleted by our
			 * transaction dead, regardless of cmax.)  We just checked that
			 * priorXmax == xmin, so we can test that variable instead of
			 * doing HeapTupleHeaderGetXmin again.
1977 1978 1979 1980 1981 1982 1983 1984
			 */
			if (TransactionIdIsCurrentTransactionId(priorXmax) &&
				HeapTupleHeaderGetCmin(tuple.t_data) >= curCid)
			{
				ReleaseBuffer(buffer);
				return NULL;
			}

1985 1986 1987 1988 1989 1990 1991 1992 1993
			/*
			 * We got tuple - now copy it for use by recheck query.
			 */
			copyTuple = heap_copytuple(&tuple);
			ReleaseBuffer(buffer);
			break;
		}

		/*
B
Bruce Momjian 已提交
1994 1995
		 * If the referenced slot was actually empty, the latest version of
		 * the row must have been deleted, so we need do nothing.
1996
		 */
1997
		if (tuple.t_data == NULL)
1998
		{
1999 2000
			ReleaseBuffer(buffer);
			return NULL;
2001 2002 2003
		}

		/*
2004
		 * As above, if xmin isn't what we're expecting, do nothing.
2005
		 */
2006 2007 2008 2009 2010 2011 2012 2013 2014
		if (!TransactionIdEquals(HeapTupleHeaderGetXmin(tuple.t_data),
								 priorXmax))
		{
			ReleaseBuffer(buffer);
			return NULL;
		}

		/*
		 * If we get here, the tuple was found but failed SnapshotDirty.
B
Bruce Momjian 已提交
2015 2016 2017 2018 2019 2020
		 * Assuming the xmin is either a committed xact or our own xact (as it
		 * certainly should be if we're trying to modify the tuple), this must
		 * mean that the row was updated or deleted by either a committed xact
		 * or our own xact.  If it was deleted, we can ignore it; if it was
		 * updated then chain up to the next version and repeat the whole
		 * test.
2021
		 *
B
Bruce Momjian 已提交
2022 2023
		 * As above, it should be safe to examine xmax and t_ctid without the
		 * buffer content lock, because they can't be changing.
2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037
		 */
		if (ItemPointerEquals(&tuple.t_self, &tuple.t_data->t_ctid))
		{
			/* deleted, so forget about it */
			ReleaseBuffer(buffer);
			return NULL;
		}

		/* updated, so look at the updated row */
		tuple.t_self = tuple.t_data->t_ctid;
		/* updated row should have xmin matching this xmax */
		priorXmax = HeapTupleHeaderGetXmax(tuple.t_data);
		ReleaseBuffer(buffer);
		/* loop back to fetch next in chain */
2038 2039 2040
	}

	/*
B
Bruce Momjian 已提交
2041 2042
	 * For UPDATE/DELETE we have to return tid of actual row we're executing
	 * PQ for.
2043 2044 2045 2046
	 */
	*tid = tuple.t_self;

	/*
2047
	 * Need to run a recheck subquery.	Find or create a PQ stack entry.
2048
	 */
2049
	epq = estate->es_evalPlanQual;
2050 2051
	endNode = true;

2052 2053
	if (epq != NULL && epq->rti == 0)
	{
2054
		/* Top PQ stack entry is idle, so re-use it */
2055
		Assert(!(estate->es_useEvalPlan) && epq->next == NULL);
2056 2057 2058 2059 2060
		epq->rti = rti;
		endNode = false;
	}

	/*
B
Bruce Momjian 已提交
2061 2062 2063 2064
	 * If this is request for another RTE - Ra, - then we have to check wasn't
	 * PlanQual requested for Ra already and if so then Ra' row was updated
	 * again and we have to re-start old execution for Ra and forget all what
	 * we done after Ra was suspended. Cool? -:))
2065
	 */
B
Bruce Momjian 已提交
2066
	if (epq != NULL && epq->rti != rti &&
2067
		epq->estate->es_evTuple[rti - 1] != NULL)
2068 2069 2070
	{
		do
		{
2071 2072
			evalPlanQual *oldepq;

2073
			/* stop execution */
2074 2075 2076 2077
			EvalPlanQualStop(epq);
			/* pop previous PlanQual from the stack */
			oldepq = epq->next;
			Assert(oldepq && oldepq->rti != 0);
2078 2079 2080
			/* push current PQ to freePQ stack */
			oldepq->free = epq;
			epq = oldepq;
2081
			estate->es_evalPlanQual = epq;
2082 2083 2084
		} while (epq->rti != rti);
	}

B
Bruce Momjian 已提交
2085
	/*
B
Bruce Momjian 已提交
2086 2087
	 * If we are requested for another RTE then we have to suspend execution
	 * of current PlanQual and start execution for new one.
2088 2089 2090 2091
	 */
	if (epq == NULL || epq->rti != rti)
	{
		/* try to reuse plan used previously */
B
Bruce Momjian 已提交
2092
		evalPlanQual *newepq = (epq != NULL) ? epq->free : NULL;
2093

2094
		if (newepq == NULL)		/* first call or freePQ stack is empty */
2095
		{
2096
			newepq = (evalPlanQual *) palloc0(sizeof(evalPlanQual));
2097
			newepq->free = NULL;
2098 2099
			newepq->estate = NULL;
			newepq->planstate = NULL;
2100 2101
		}
		else
2102
		{
2103 2104 2105
			/* recycle previously used PlanQual */
			Assert(newepq->estate == NULL);
			epq->free = NULL;
2106
		}
2107
		/* push current PQ to the stack */
2108
		newepq->next = epq;
2109
		epq = newepq;
2110
		estate->es_evalPlanQual = epq;
2111 2112 2113 2114
		epq->rti = rti;
		endNode = false;
	}

2115
	Assert(epq->rti == rti);
2116 2117

	/*
B
Bruce Momjian 已提交
2118 2119 2120 2121 2122 2123
	 * Ok - we're requested for the same RTE.  Unfortunately we still have to
	 * end and restart execution of the plan, because ExecReScan wouldn't
	 * ensure that upper plan nodes would reset themselves.  We could make
	 * that work if insertion of the target tuple were integrated with the
	 * Param mechanism somehow, so that the upper plan nodes know that their
	 * children's outputs have changed.
2124
	 *
B
Bruce Momjian 已提交
2125 2126
	 * Note that the stack of free evalPlanQual nodes is quite useless at the
	 * moment, since it only saves us from pallocing/releasing the
B
Bruce Momjian 已提交
2127 2128
	 * evalPlanQual nodes themselves.  But it will be useful once we implement
	 * ReScan instead of end/restart for re-using PlanQual nodes.
2129 2130
	 */
	if (endNode)
2131
	{
2132
		/* stop execution */
2133
		EvalPlanQualStop(epq);
2134
	}
2135

2136 2137 2138
	/*
	 * Initialize new recheck query.
	 *
B
Bruce Momjian 已提交
2139 2140
	 * Note: if we were re-using PlanQual plans via ExecReScan, we'd need to
	 * instead copy down changeable state from the top plan (including
B
Bruce Momjian 已提交
2141 2142
	 * es_result_relation_info, es_junkFilter) and reset locally changeable
	 * state in the epq (including es_param_exec_vals, es_evTupleNull).
2143 2144 2145
	 */
	EvalPlanQualStart(epq, estate, epq->next);

2146
	/*
B
Bruce Momjian 已提交
2147 2148
	 * free old RTE' tuple, if any, and store target tuple where relation's
	 * scan node will see it
2149
	 */
2150
	epqstate = epq->estate;
2151 2152 2153
	if (epqstate->es_evTuple[rti - 1] != NULL)
		heap_freetuple(epqstate->es_evTuple[rti - 1]);
	epqstate->es_evTuple[rti - 1] = copyTuple;
2154

2155
	return EvalPlanQualNext(estate);
2156 2157
}

B
Bruce Momjian 已提交
2158
static TupleTableSlot *
2159 2160
EvalPlanQualNext(EState *estate)
{
2161 2162
	evalPlanQual *epq = estate->es_evalPlanQual;
	MemoryContext oldcontext;
B
Bruce Momjian 已提交
2163
	TupleTableSlot *slot;
2164 2165 2166 2167

	Assert(epq->rti != 0);

lpqnext:;
2168
	oldcontext = MemoryContextSwitchTo(epq->estate->es_query_cxt);
2169
	slot = ExecProcNode(epq->planstate);
2170
	MemoryContextSwitchTo(oldcontext);
2171 2172 2173 2174 2175 2176

	/*
	 * No more tuples for this PQ. Continue previous one.
	 */
	if (TupIsNull(slot))
	{
2177 2178
		evalPlanQual *oldepq;

2179
		/* stop execution */
2180
		EvalPlanQualStop(epq);
2181
		/* pop old PQ from the stack */
2182 2183
		oldepq = epq->next;
		if (oldepq == NULL)
2184
		{
2185 2186 2187 2188
			/* this is the first (oldest) PQ - mark as free */
			epq->rti = 0;
			estate->es_useEvalPlan = false;
			/* and continue Query execution */
2189
			return NULL;
2190 2191 2192 2193 2194
		}
		Assert(oldepq->rti != 0);
		/* push current PQ to freePQ stack */
		oldepq->free = epq;
		epq = oldepq;
2195
		estate->es_evalPlanQual = epq;
2196 2197 2198
		goto lpqnext;
	}

2199
	return slot;
2200
}
2201 2202 2203 2204

static void
EndEvalPlanQual(EState *estate)
{
2205
	evalPlanQual *epq = estate->es_evalPlanQual;
2206

2207 2208
	if (epq->rti == 0)			/* plans already shutdowned */
	{
2209
		Assert(epq->next == NULL);
2210
		return;
2211
	}
2212 2213 2214

	for (;;)
	{
2215 2216
		evalPlanQual *oldepq;

2217
		/* stop execution */
2218
		EvalPlanQualStop(epq);
2219
		/* pop old PQ from the stack */
2220 2221
		oldepq = epq->next;
		if (oldepq == NULL)
2222
		{
2223 2224 2225
			/* this is the first (oldest) PQ - mark as free */
			epq->rti = 0;
			estate->es_useEvalPlan = false;
2226 2227 2228 2229 2230 2231
			break;
		}
		Assert(oldepq->rti != 0);
		/* push current PQ to freePQ stack */
		oldepq->free = epq;
		epq = oldepq;
2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247
		estate->es_evalPlanQual = epq;
	}
}

/*
 * Start execution of one level of PlanQual.
 *
 * This is a cut-down version of ExecutorStart(): we copy some state from
 * the top-level estate rather than initializing it fresh.
 */
static void
EvalPlanQualStart(evalPlanQual *epq, EState *estate, evalPlanQual *priorepq)
{
	EState	   *epqstate;
	int			rtsize;
	MemoryContext oldcontext;
2248
	ListCell   *l;
2249

2250
	rtsize = list_length(estate->es_range_table);
2251 2252 2253 2254 2255 2256

	epq->estate = epqstate = CreateExecutorState();

	oldcontext = MemoryContextSwitchTo(epqstate->es_query_cxt);

	/*
B
Bruce Momjian 已提交
2257 2258 2259 2260
	 * The epqstates share the top query's copy of unchanging state such as
	 * the snapshot, rangetable, result-rel info, and external Param info.
	 * They need their own copies of local state, including a tuple table,
	 * es_param_exec_vals, etc.
2261 2262 2263
	 */
	epqstate->es_direction = ForwardScanDirection;
	epqstate->es_snapshot = estate->es_snapshot;
2264
	epqstate->es_crosscheck_snapshot = estate->es_crosscheck_snapshot;
2265 2266 2267 2268 2269 2270
	epqstate->es_range_table = estate->es_range_table;
	epqstate->es_result_relations = estate->es_result_relations;
	epqstate->es_num_result_relations = estate->es_num_result_relations;
	epqstate->es_result_relation_info = estate->es_result_relation_info;
	epqstate->es_junkFilter = estate->es_junkFilter;
	epqstate->es_into_relation_descriptor = estate->es_into_relation_descriptor;
2271
	epqstate->es_into_relation_use_wal = estate->es_into_relation_use_wal;
2272
	epqstate->es_param_list_info = estate->es_param_list_info;
2273
	if (estate->es_plannedstmt->nParamExec > 0)
2274
		epqstate->es_param_exec_vals = (ParamExecData *)
2275
			palloc0(estate->es_plannedstmt->nParamExec * sizeof(ParamExecData));
2276
	epqstate->es_rowMarks = estate->es_rowMarks;
2277
	epqstate->es_instrument = estate->es_instrument;
2278 2279
	epqstate->es_select_into = estate->es_select_into;
	epqstate->es_into_oids = estate->es_into_oids;
2280
	epqstate->es_plannedstmt = estate->es_plannedstmt;
B
Bruce Momjian 已提交
2281

2282
	/*
B
Bruce Momjian 已提交
2283 2284 2285
	 * Each epqstate must have its own es_evTupleNull state, but all the stack
	 * entries share es_evTuple state.	This allows sub-rechecks to inherit
	 * the value being examined by an outer recheck.
2286 2287 2288 2289 2290 2291 2292 2293 2294 2295
	 */
	epqstate->es_evTupleNull = (bool *) palloc0(rtsize * sizeof(bool));
	if (priorepq == NULL)
		/* first PQ stack entry */
		epqstate->es_evTuple = (HeapTuple *)
			palloc0(rtsize * sizeof(HeapTuple));
	else
		/* later stack entries share the same storage */
		epqstate->es_evTuple = priorepq->estate->es_evTuple;

2296 2297 2298
	/*
	 * Create sub-tuple-table; we needn't redo the CountSlots work though.
	 */
2299 2300 2301
	epqstate->es_tupleTable =
		ExecCreateTupleTable(estate->es_tupleTable->size);

2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323
	/*
	 * Initialize private state information for each SubPlan.  We must do
	 * this before running ExecInitNode on the main query tree, since
	 * ExecInitSubPlan expects to be able to find these entries.
	 */
	Assert(epqstate->es_subplanstates == NIL);
	foreach(l, estate->es_plannedstmt->subplans)
	{
		Plan   *subplan = (Plan *) lfirst(l);
		PlanState *subplanstate;

		subplanstate = ExecInitNode(subplan, epqstate, 0);

		epqstate->es_subplanstates = lappend(epqstate->es_subplanstates,
											 subplanstate);
	}

	/*
	 * Initialize the private state information for all the nodes in the query
	 * tree.  This opens files, allocates storage and leaves us ready to start
	 * processing tuples.
	 */
2324
	epq->planstate = ExecInitNode(estate->es_plannedstmt->planTree, epqstate, 0);
2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340

	MemoryContextSwitchTo(oldcontext);
}

/*
 * End execution of one level of PlanQual.
 *
 * This is a cut-down version of ExecutorEnd(); basically we want to do most
 * of the normal cleanup, but *not* close result relations (which we are
 * just sharing from the outer query).
 */
static void
EvalPlanQualStop(evalPlanQual *epq)
{
	EState	   *epqstate = epq->estate;
	MemoryContext oldcontext;
2341
	ListCell   *l;
2342 2343 2344 2345 2346

	oldcontext = MemoryContextSwitchTo(epqstate->es_query_cxt);

	ExecEndNode(epq->planstate);

2347 2348 2349 2350 2351 2352 2353
	foreach(l, epqstate->es_subplanstates)
	{
		PlanState *subplanstate = (PlanState *) lfirst(l);

		ExecEndNode(subplanstate);
	}

2354 2355 2356 2357 2358 2359 2360
	ExecDropTupleTable(epqstate->es_tupleTable, true);
	epqstate->es_tupleTable = NULL;

	if (epqstate->es_evTuple[epq->rti - 1] != NULL)
	{
		heap_freetuple(epqstate->es_evTuple[epq->rti - 1]);
		epqstate->es_evTuple[epq->rti - 1] = NULL;
2361
	}
2362 2363 2364 2365 2366 2367 2368

	MemoryContextSwitchTo(oldcontext);

	FreeExecutorState(epqstate);

	epq->estate = NULL;
	epq->planstate = NULL;
2369
}
2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397


/*
 * Support for SELECT INTO (a/k/a CREATE TABLE AS)
 *
 * We implement SELECT INTO by diverting SELECT's normal output with
 * a specialized DestReceiver type.
 *
 * TODO: remove some of the INTO-specific cruft from EState, and keep
 * it in the DestReceiver instead.
 */

typedef struct
{
	DestReceiver pub;			/* publicly-known function pointers */
	EState	   *estate;			/* EState we are working with */
} DR_intorel;

/*
 * OpenIntoRel --- actually create the SELECT INTO target relation
 *
 * This also replaces QueryDesc->dest with the special DestReceiver for
 * SELECT INTO.  We assume that the correct result tuple type has already
 * been placed in queryDesc->tupDesc.
 */
static void
OpenIntoRel(QueryDesc *queryDesc)
{
2398
	IntoClause *into = queryDesc->plannedstmt->into;
2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409
	EState	   *estate = queryDesc->estate;
	Relation	intoRelationDesc;
	char	   *intoName;
	Oid			namespaceId;
	Oid			tablespaceId;
	Datum		reloptions;
	AclResult	aclresult;
	Oid			intoRelationId;
	TupleDesc	tupdesc;
	DR_intorel *myState;

2410 2411
	Assert(into);

2412 2413 2414
	/*
	 * Check consistency of arguments
	 */
2415
	if (into->onCommit != ONCOMMIT_NOOP && !into->rel->istemp)
2416 2417 2418 2419 2420 2421 2422
		ereport(ERROR,
				(errcode(ERRCODE_INVALID_TABLE_DEFINITION),
				 errmsg("ON COMMIT can only be used on temporary tables")));

	/*
	 * Find namespace to create in, check its permissions
	 */
2423 2424
	intoName = into->rel->relname;
	namespaceId = RangeVarGetCreationNamespace(into->rel);
2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435

	aclresult = pg_namespace_aclcheck(namespaceId, GetUserId(),
									  ACL_CREATE);
	if (aclresult != ACLCHECK_OK)
		aclcheck_error(aclresult, ACL_KIND_NAMESPACE,
					   get_namespace_name(namespaceId));

	/*
	 * Select tablespace to use.  If not specified, use default_tablespace
	 * (which may in turn default to database's default).
	 */
2436
	if (into->tableSpaceName)
2437
	{
2438
		tablespaceId = get_tablespace_oid(into->tableSpaceName);
2439 2440 2441 2442
		if (!OidIsValid(tablespaceId))
			ereport(ERROR,
					(errcode(ERRCODE_UNDEFINED_OBJECT),
					 errmsg("tablespace \"%s\" does not exist",
2443
							into->tableSpaceName)));
B
Bruce Momjian 已提交
2444 2445
	}
	else
2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465
	{
		tablespaceId = GetDefaultTablespace();
		/* note InvalidOid is OK in this case */
	}

	/* Check permissions except when using the database's default space */
	if (OidIsValid(tablespaceId))
	{
		AclResult	aclresult;

		aclresult = pg_tablespace_aclcheck(tablespaceId, GetUserId(),
										   ACL_CREATE);

		if (aclresult != ACLCHECK_OK)
			aclcheck_error(aclresult, ACL_KIND_TABLESPACE,
						   get_tablespace_name(tablespaceId));
	}

	/* Parse and validate any reloptions */
	reloptions = transformRelOptions((Datum) 0,
2466
									 into->options,
2467 2468 2469 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484
									 true,
									 false);
	(void) heap_reloptions(RELKIND_RELATION, reloptions, true);

	/* have to copy the actual tupdesc to get rid of any constraints */
	tupdesc = CreateTupleDescCopy(queryDesc->tupDesc);

	/* Now we can actually create the new relation */
	intoRelationId = heap_create_with_catalog(intoName,
											  namespaceId,
											  tablespaceId,
											  InvalidOid,
											  GetUserId(),
											  tupdesc,
											  RELKIND_RELATION,
											  false,
											  true,
											  0,
2485
											  into->onCommit,
2486 2487 2488 2489 2490 2491
											  reloptions,
											  allowSystemTableMods);

	FreeTupleDesc(tupdesc);

	/*
B
Bruce Momjian 已提交
2492 2493
	 * Advance command counter so that the newly-created relation's catalog
	 * tuples will be visible to heap_open.
2494 2495 2496 2497 2498
	 */
	CommandCounterIncrement();

	/*
	 * If necessary, create a TOAST table for the INTO relation. Note that
B
Bruce Momjian 已提交
2499 2500
	 * AlterTableCreateToastTable ends with CommandCounterIncrement(), so that
	 * the TOAST table will be visible for insertion.
2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 2512 2513 2514
	 */
	AlterTableCreateToastTable(intoRelationId);

	/*
	 * And open the constructed table for writing.
	 */
	intoRelationDesc = heap_open(intoRelationId, AccessExclusiveLock);

	/* use_wal off requires rd_targblock be initially invalid */
	Assert(intoRelationDesc->rd_targblock == InvalidBlockNumber);

	/*
	 * We can skip WAL-logging the insertions, unless PITR is in use.
	 *
B
Bruce Momjian 已提交
2515 2516 2517 2518 2519
	 * Note that for a non-temp INTO table, this is safe only because we know
	 * that the catalog changes above will have been WAL-logged, and so
	 * RecordTransactionCommit will think it needs to WAL-log the eventual
	 * transaction commit.	Else the commit might be lost, even though all the
	 * data is safely fsync'd ...
2520 2521 2522 2523 2524 2525 2526 2527 2528 2529 2530 2531 2532 2533 2534 2535 2536 2537 2538 2539 2540 2541 2542 2543
	 */
	estate->es_into_relation_use_wal = XLogArchivingActive();
	estate->es_into_relation_descriptor = intoRelationDesc;

	/*
	 * Now replace the query's DestReceiver with one for SELECT INTO
	 */
	queryDesc->dest = CreateDestReceiver(DestIntoRel, NULL);
	myState = (DR_intorel *) queryDesc->dest;
	Assert(myState->pub.mydest == DestIntoRel);
	myState->estate = estate;
}

/*
 * CloseIntoRel --- clean up SELECT INTO at ExecutorEnd time
 */
static void
CloseIntoRel(QueryDesc *queryDesc)
{
	EState	   *estate = queryDesc->estate;

	/* OpenIntoRel might never have gotten called */
	if (estate->es_into_relation_descriptor)
	{
2544 2545
		/* If we skipped using WAL, must heap_sync before commit */
		if (!estate->es_into_relation_use_wal)
2546
			heap_sync(estate->es_into_relation_descriptor);
2547 2548 2549 2550 2551 2552 2553 2554 2555 2556 2557 2558 2559 2560 2561 2562 2563 2564 2565 2566 2567 2568 2569 2570 2571 2572 2573 2574 2575 2576 2577 2578 2579 2580 2581 2582 2583 2584 2585 2586 2587 2588 2589 2590 2591 2592 2593 2594 2595 2596 2597 2598 2599 2600 2601 2602 2603 2604 2605 2606 2607 2608 2609 2610 2611 2612 2613 2614 2615 2616 2617 2618 2619 2620 2621 2622 2623 2624 2625 2626 2627 2628

		/* close rel, but keep lock until commit */
		heap_close(estate->es_into_relation_descriptor, NoLock);

		estate->es_into_relation_descriptor = NULL;
	}
}

/*
 * CreateIntoRelDestReceiver -- create a suitable DestReceiver object
 *
 * Since CreateDestReceiver doesn't accept the parameters we'd need,
 * we just leave the private fields empty here.  OpenIntoRel will
 * fill them in.
 */
DestReceiver *
CreateIntoRelDestReceiver(void)
{
	DR_intorel *self = (DR_intorel *) palloc(sizeof(DR_intorel));

	self->pub.receiveSlot = intorel_receive;
	self->pub.rStartup = intorel_startup;
	self->pub.rShutdown = intorel_shutdown;
	self->pub.rDestroy = intorel_destroy;
	self->pub.mydest = DestIntoRel;

	self->estate = NULL;

	return (DestReceiver *) self;
}

/*
 * intorel_startup --- executor startup
 */
static void
intorel_startup(DestReceiver *self, int operation, TupleDesc typeinfo)
{
	/* no-op */
}

/*
 * intorel_receive --- receive one tuple
 */
static void
intorel_receive(TupleTableSlot *slot, DestReceiver *self)
{
	DR_intorel *myState = (DR_intorel *) self;
	EState	   *estate = myState->estate;
	HeapTuple	tuple;

	tuple = ExecCopySlotTuple(slot);

	heap_insert(estate->es_into_relation_descriptor,
				tuple,
				estate->es_snapshot->curcid,
				estate->es_into_relation_use_wal,
				false);			/* never any point in using FSM */

	/* We know this is a newly created relation, so there are no indexes */

	heap_freetuple(tuple);

	IncrAppended();
}

/*
 * intorel_shutdown --- executor end
 */
static void
intorel_shutdown(DestReceiver *self)
{
	/* no-op */
}

/*
 * intorel_destroy --- release DestReceiver object
 */
static void
intorel_destroy(DestReceiver *self)
{
	pfree(self);
}