sequence.c 55.1 KB
Newer Older
1 2
/*-------------------------------------------------------------------------
 *
3
 * sequence.c
4
 *	  PostgreSQL sequences support code.
5
 *
6
 * Portions Copyright (c) 2005-2008, Greenplum inc.
7
 * Portions Copyright (c) 2012-Present Pivotal Software, Inc.
8
 * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
9 10 11 12
 * Portions Copyright (c) 1994, Regents of the University of California
 *
 *
 * IDENTIFICATION
13
 *	  $PostgreSQL: pgsql/src/backend/commands/sequence.c,v 1.154 2008/07/13 20:45:47 tgl Exp $
14
 *
15 16
 *-------------------------------------------------------------------------
 */
17
#include "postgres.h"
18

19
#include "access/heapam.h"
20
#include "access/bufmask.h"
21 22
#include "access/transam.h"
#include "access/xact.h"
23
#include "access/xlogutils.h"
24
#include "catalog/dependency.h"
25
#include "catalog/heap.h"
26
#include "catalog/namespace.h"
27
#include "catalog/pg_type.h"
28
#include "commands/defrem.h"
29
#include "commands/sequence.h"
30
#include "commands/tablecmds.h"
B
Bruce Momjian 已提交
31
#include "miscadmin.h"
32
#include "storage/smgr.h"               /* RelationCloseSmgr -> smgrclose */
33
#include "nodes/makefuncs.h"
34 35
#include "storage/bufmgr.h"
#include "storage/lmgr.h"
36
#include "storage/proc.h"
37
#include "utils/acl.h"
B
Bruce Momjian 已提交
38
#include "utils/builtins.h"
39
#include "utils/formatting.h"
40
#include "utils/lsyscache.h"
41
#include "utils/resowner.h"
42
#include "utils/syscache.h"
43

44
#include "cdb/cdbdisp_query.h"
H
Heikki Linnakangas 已提交
45
#include "cdb/cdbdoublylinked.h"
46 47 48 49 50 51 52 53 54
#include "cdb/cdbsrlz.h"
#include "cdb/cdbvars.h"
#include "cdb/cdbmotion.h"
#include "cdb/ml_ipc.h"

#include "cdb/cdbpersistentfilesysobj.h"

#include "postmaster/seqserver.h"

55

V
Vadim B. Mikheev 已提交
56
/*
57
 * We don't want to log each fetching of a value from a sequence,
V
Vadim B. Mikheev 已提交
58
 * so we pre-log a few fetches in advance. In the event of
59
 * crash we can lose (skip over) as many values as we pre-logged.
V
Vadim B. Mikheev 已提交
60
 */
B
Bruce Momjian 已提交
61
#define SEQ_LOG_VALS	32
62

63 64 65 66 67
/*
 * The "special area" of a sequence's buffer page looks like this.
 */
#define SEQ_MAGIC	  0x1717

68 69
typedef struct sequence_magic
{
70
	uint32		magic;
71
} sequence_magic;
72

73 74 75 76 77 78
/*
 * We store a SeqTable item for every sequence we have touched in the current
 * session.  This is needed to hold onto nextval/currval state.  (We can't
 * rely on the relcache, since it's only, well, a cache, and may decide to
 * discard entries.)
 *
B
Bruce Momjian 已提交
79
 * XXX We use linear search to find pre-existing SeqTable entries.	This is
80 81 82
 * good when only a small number of sequences are touched in a session, but
 * would suck with many different sequences.  Perhaps use a hashtable someday.
 */
83 84
typedef struct SeqTableData
{
85 86
	struct SeqTableData *next;	/* link to next SeqTable object */
	Oid			relid;			/* pg_class OID of this sequence */
87
	LocalTransactionId lxid;	/* xact in which we last did a seq op */
88
	bool		last_valid;		/* do we have a valid "last" value? */
89 90 91 92
	int64		last;			/* value last returned by nextval */
	int64		cached;			/* last value already cached for nextval */
	/* if last != cached, we have not used up all the cached values */
	int64		increment;		/* copy of sequence's increment field */
93
	/* note that increment is zero until we first do read_seq_tuple() */
94
} SeqTableData;
95 96 97

typedef SeqTableData *SeqTable;

98
static SeqTable seqtab = NULL;	/* Head of list of SeqTable items */
99

100 101 102 103 104
/*
 * last_used_seq is updated by nextval() to point to the last used
 * sequence.
 */
static SeqTableData *last_used_seq = NULL;
105

106
static int64 nextval_internal(Oid relid);
107
static Relation open_share_lock(SeqTable seq);
108
static void init_sequence(Oid relid, SeqTable *p_elm, Relation *p_rel);
109 110
static Form_pg_sequence read_seq_tuple(SeqTable elm, Relation rel,
			   Buffer *buf, HeapTuple seqtuple);
111
static void init_params(List *options, bool isInit,
B
Bruce Momjian 已提交
112
			Form_pg_sequence new, List **owned_by);
113
static void do_setval(Oid relid, int64 next, bool iscalled);
114 115
static void process_owned_by(Relation seqrel, List *owned_by);

116
static void
117 118
cdb_sequence_nextval(SeqTable elm,
					 Relation   seqrel,
119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354
                     int64     *plast,
                     int64     *pcached,
                     int64     *pincrement,
                     bool      *seq_overflow);
static void
cdb_sequence_nextval_proxy(Relation seqrel,
                           int64   *plast,
                           int64   *pcached,
                           int64   *pincrement,
                           bool    *poverflow);

typedef struct SequencePersistentInfoCacheEntryKey
{
	RelFileNode				relFileNode;
} SequencePersistentInfoCacheEntryKey;

typedef struct SequencePersistentInfoCacheEntryData
{
	SequencePersistentInfoCacheEntryKey	key;

	ItemPointerData		persistentTid;

	int64				persistentSerialNum;

	DoubleLinks			lruLinks;

} SequencePersistentInfoCacheEntryData;
typedef SequencePersistentInfoCacheEntryData *SequencePersistentInfoCacheEntry;

static HTAB *sequencePersistentInfoCacheTable = NULL;

static DoublyLinkedHead	sequencePersistentInfoCacheLruListHead;

static int sequencePersistentInfoCacheLruCount = 0;

static int sequencePersistentInfoCacheLruLimit = 100;

static void
Sequence_PersistentInfoCacheTableInit(void)
{
	HASHCTL			info;
	int				hash_flags;

	/* Set key and entry sizes. */
	MemSet(&info, 0, sizeof(info));
	info.keysize = sizeof(SequencePersistentInfoCacheEntryKey);
	info.entrysize = sizeof(SequencePersistentInfoCacheEntryData);
	info.hash = tag_hash;

	hash_flags = (HASH_ELEM | HASH_FUNCTION);

	sequencePersistentInfoCacheTable = hash_create("Sequence Persistent Info", 10, &info, hash_flags);

	DoublyLinkedHead_Init(
				&sequencePersistentInfoCacheLruListHead);
}

static bool Sequence_CheckPersistentInfoCache(
	RelFileNode 		*relFileNode,

	ItemPointer			persistentTid,

	int64				*persistentSerialNum)
{
	SequencePersistentInfoCacheEntryKey	key;

	SequencePersistentInfoCacheEntry persistentInfoCacheEntry;

	bool found;

	if (sequencePersistentInfoCacheTable == NULL)
		Sequence_PersistentInfoCacheTableInit();

	MemSet(&key, 0, sizeof(SequencePersistentInfoCacheEntryKey));
	key.relFileNode = *relFileNode;

	persistentInfoCacheEntry = 
		(SequencePersistentInfoCacheEntry) 
						hash_search(sequencePersistentInfoCacheTable,
									(void *) &key,
									HASH_FIND,
									&found);
	if (!found)
		return false;
	
	*persistentTid = persistentInfoCacheEntry->persistentTid;
	*persistentSerialNum = persistentInfoCacheEntry->persistentSerialNum;

	/*
	 * LRU.
	 */
	DoubleLinks_Remove(
		offsetof(SequencePersistentInfoCacheEntryData, lruLinks),
		&sequencePersistentInfoCacheLruListHead,
		persistentInfoCacheEntry);

	DoublyLinkedHead_AddFirst(
		offsetof(SequencePersistentInfoCacheEntryData, lruLinks),
		&sequencePersistentInfoCacheLruListHead,
		persistentInfoCacheEntry);

	return true;	
}

static void Sequence_AddPersistentInfoCache(
	RelFileNode 		*relFileNode,

	ItemPointer			persistentTid,

	int64				persistentSerialNum)
{
	SequencePersistentInfoCacheEntryKey	key;

	SequencePersistentInfoCacheEntry persistentInfoCacheEntry;

	bool found;

	if (sequencePersistentInfoCacheTable == NULL)
		Sequence_PersistentInfoCacheTableInit();

	MemSet(&key, 0, sizeof(SequencePersistentInfoCacheEntryKey));
	key.relFileNode = *relFileNode;

	persistentInfoCacheEntry = 
		(SequencePersistentInfoCacheEntry) 
						hash_search(
								sequencePersistentInfoCacheTable,
								(void *) &key,
								HASH_ENTER,
								&found);
	Assert (!found);
	
	persistentInfoCacheEntry->persistentTid = *persistentTid;
	persistentInfoCacheEntry->persistentSerialNum = persistentSerialNum;

	DoubleLinks_Init(&persistentInfoCacheEntry->lruLinks);

	/*
	 * LRU.
	 */
	DoublyLinkedHead_AddFirst(
		offsetof(SequencePersistentInfoCacheEntryData, lruLinks),
		&sequencePersistentInfoCacheLruListHead,
		persistentInfoCacheEntry);

	sequencePersistentInfoCacheLruCount++;

	if (sequencePersistentInfoCacheLruCount > sequencePersistentInfoCacheLruLimit)
	{
		SequencePersistentInfoCacheEntry lastPersistentInfoCacheEntry;

		lastPersistentInfoCacheEntry = 
			(SequencePersistentInfoCacheEntry) 
							DoublyLinkedHead_Last(
								offsetof(SequencePersistentInfoCacheEntryData, lruLinks),
								&sequencePersistentInfoCacheLruListHead);
		Assert(lastPersistentInfoCacheEntry != NULL);
		
		DoubleLinks_Remove(
			offsetof(SequencePersistentInfoCacheEntryData, lruLinks),
			&sequencePersistentInfoCacheLruListHead,
			lastPersistentInfoCacheEntry);
		
		if (Debug_persistent_print)
			elog(Persistent_DebugPrintLevel(), 
				 "Removed cached persistent information for sequence %u/%u/%u -- serial number " INT64_FORMAT ", TID %s",
				 lastPersistentInfoCacheEntry->key.relFileNode.spcNode,
				 lastPersistentInfoCacheEntry->key.relFileNode.dbNode,
				 lastPersistentInfoCacheEntry->key.relFileNode.relNode,
				 lastPersistentInfoCacheEntry->persistentSerialNum,
				 ItemPointerToString(&lastPersistentInfoCacheEntry->persistentTid));

		hash_search(
				sequencePersistentInfoCacheTable, 
				(void *) &lastPersistentInfoCacheEntry->key, 
				HASH_REMOVE, 
				NULL);
		
		sequencePersistentInfoCacheLruCount--;
	}
}


static void
Sequence_FetchGpRelationNodeForXLog(Relation rel)
{
	if (rel->rd_segfile0_relationnodeinfo.isPresent)
		return;

	/*
	 * For better performance, we cache the persistent information
	 * for sequences with upper bound and use LRU...
	 */
	if (Sequence_CheckPersistentInfoCache(
								&rel->rd_node,
								&rel->rd_segfile0_relationnodeinfo.persistentTid,
								&rel->rd_segfile0_relationnodeinfo.persistentSerialNum))
	{
		if (Debug_persistent_print)
			elog(Persistent_DebugPrintLevel(), 
				 "Found cached persistent information for sequence %u/%u/%u -- serial number " INT64_FORMAT ", TID %s",
				 rel->rd_node.spcNode,
				 rel->rd_node.dbNode,
				 rel->rd_node.relNode,
				 rel->rd_segfile0_relationnodeinfo.persistentSerialNum,
				 ItemPointerToString(&rel->rd_segfile0_relationnodeinfo.persistentTid));
	} 
	else 
	{
		if (!PersistentFileSysObj_ScanForRelation(
												&rel->rd_node,
												/* segmentFileNum */ 0,
												&rel->rd_segfile0_relationnodeinfo.persistentTid,
												&rel->rd_segfile0_relationnodeinfo.persistentSerialNum))
		{
			elog(ERROR, "Cound not find persistent information for sequence %u/%u/%u",
			     rel->rd_node.spcNode,
			     rel->rd_node.dbNode,
			     rel->rd_node.relNode);
		}

		Sequence_AddPersistentInfoCache(
								&rel->rd_node,
								&rel->rd_segfile0_relationnodeinfo.persistentTid,
								rel->rd_segfile0_relationnodeinfo.persistentSerialNum);

		if (Debug_persistent_print)
			elog(Persistent_DebugPrintLevel(), 
				 "Add cached persistent information for sequence %u/%u/%u -- serial number " INT64_FORMAT ", TID %s",
				 rel->rd_node.spcNode,
				 rel->rd_node.dbNode,
				 rel->rd_node.relNode,
				 rel->rd_segfile0_relationnodeinfo.persistentSerialNum,
				 ItemPointerToString(&rel->rd_segfile0_relationnodeinfo.persistentTid));
	}

355
	if (!Persistent_BeforePersistenceWork() &&
356 357 358 359 360 361 362 363 364 365 366 367 368
		PersistentStore_IsZeroTid(&rel->rd_segfile0_relationnodeinfo.persistentTid))
	{	
		elog(ERROR, 
			 "Sequence_FetchGpRelationNodeForXLog has invalid TID (0,0) for relation %u/%u/%u '%s', serial number " INT64_FORMAT,
			 rel->rd_node.spcNode,
			 rel->rd_node.dbNode,
			 rel->rd_node.relNode,
			 NameStr(rel->rd_rel->relname),
			 rel->rd_segfile0_relationnodeinfo.persistentSerialNum);
	}

	rel->rd_segfile0_relationnodeinfo.isPresent = true;
}
369 370

/*
B
Bruce Momjian 已提交
371
 * DefineSequence
372
 *				Creates a new sequence relation
373 374
 */
void
375
DefineSequence(CreateSeqStmt *seq)
376
{
377 378
	MIRROREDLOCK_BUFMGR_DECLARE;

379
	FormData_pg_sequence new;
380
	List	   *owned_by;
381
	CreateStmt *stmt = makeNode(CreateStmt);
382
	Oid			seqoid;
383 384
	Relation	rel;
	Buffer		buf;
385
	Page		page;
386
	sequence_magic *sm;
387 388 389
	HeapTuple	tuple;
	TupleDesc	tupDesc;
	Datum		value[SEQ_COL_LASTCOL];
390
	bool		null[SEQ_COL_LASTCOL];
391
	int			i;
392
	NameData	name;
393

394 395
	bool shouldDispatch =  Gp_role == GP_ROLE_DISPATCH && !IsBootstrapProcessingMode();

396
	/* Check and set all option values */
397
	init_params(seq->options, true, &new, &owned_by);
398 399

	/*
400
	 * Create relation (and fill value[] and null[] for the tuple)
401 402 403
	 */
	stmt->tableElts = NIL;
	for (i = SEQ_COL_FIRSTCOL; i <= SEQ_COL_LASTCOL; i++)
404
	{
405
		ColumnDef  *coldef = makeNode(ColumnDef);
406

407 408
		coldef->inhcount = 0;
		coldef->is_local = true;
409
		coldef->is_not_null = true;
410 411
		coldef->raw_default = NULL;
		coldef->cooked_default = NULL;
412 413
		coldef->constraints = NIL;

414
		null[i - 1] = false;
415 416 417

		switch (i)
		{
418
			case SEQ_COL_NAME:
419
				coldef->typeName = makeTypeNameFromOid(NAMEOID, -1);
420
				coldef->colname = "sequence_name";
421
				namestrcpy(&name, seq->sequence->relname);
422
				value[i - 1] = NameGetDatum(&name);
423 424
				break;
			case SEQ_COL_LASTVAL:
425
				coldef->typeName = makeTypeNameFromOid(INT8OID, -1);
426
				coldef->colname = "last_value";
427
				value[i - 1] = Int64GetDatumFast(new.last_value);
428
				break;
429
			case SEQ_COL_STARTVAL:
430
				coldef->typeName = makeTypeNameFromOid(INT8OID, -1);
431 432 433
				coldef->colname = "start_value";
				value[i - 1] = Int64GetDatumFast(new.start_value);
				break;
434
			case SEQ_COL_INCBY:
435
				coldef->typeName = makeTypeNameFromOid(INT8OID, -1);
436
				coldef->colname = "increment_by";
437
				value[i - 1] = Int64GetDatumFast(new.increment_by);
438 439
				break;
			case SEQ_COL_MAXVALUE:
440
				coldef->typeName = makeTypeNameFromOid(INT8OID, -1);
441
				coldef->colname = "max_value";
442
				value[i - 1] = Int64GetDatumFast(new.max_value);
443 444
				break;
			case SEQ_COL_MINVALUE:
445
				coldef->typeName = makeTypeNameFromOid(INT8OID, -1);
446
				coldef->colname = "min_value";
447
				value[i - 1] = Int64GetDatumFast(new.min_value);
448 449
				break;
			case SEQ_COL_CACHE:
450
				coldef->typeName = makeTypeNameFromOid(INT8OID, -1);
451
				coldef->colname = "cache_value";
452
				value[i - 1] = Int64GetDatumFast(new.cache_value);
453
				break;
V
Vadim B. Mikheev 已提交
454
			case SEQ_COL_LOG:
455
				coldef->typeName = makeTypeNameFromOid(INT8OID, -1);
V
Vadim B. Mikheev 已提交
456
				coldef->colname = "log_cnt";
457
				value[i - 1] = Int64GetDatum((int64) 0);
V
Vadim B. Mikheev 已提交
458
				break;
459
			case SEQ_COL_CYCLE:
460
				coldef->typeName = makeTypeNameFromOid(BOOLOID, -1);
461
				coldef->colname = "is_cycled";
462
				value[i - 1] = BoolGetDatum(new.is_cycled);
463 464
				break;
			case SEQ_COL_CALLED:
465
				coldef->typeName = makeTypeNameFromOid(BOOLOID, -1);
466
				coldef->colname = "is_called";
467
				value[i - 1] = BoolGetDatum(false);
468
				break;
469 470 471 472
		}
		stmt->tableElts = lappend(stmt->tableElts, coldef);
	}

473 474
	stmt->relation = seq->sequence;
	stmt->inhRelations = NIL;
475
	stmt->constraints = NIL;
476 477
	stmt->inhOids = NIL;
	stmt->parentOidCount = 0;
B
Bruce Momjian 已提交
478
	stmt->options = list_make1(defWithOids(false));
479
	stmt->oncommit = ONCOMMIT_NOOP;
480
	stmt->tablespacename = NULL;
481 482
	stmt->relKind = RELKIND_SEQUENCE;
	stmt->ownerid = GetUserId();
483

484
	seqoid = DefineRelation(stmt, RELKIND_SEQUENCE, RELSTORAGE_HEAP, false);
485

486 487 488 489 490 491 492 493
	/*
	 * Open and lock the new sequence.  (This lock is redundant; an
	 * AccessExclusiveLock was acquired above by DefineRelation and
	 * won't be released until end of transaction.)
	 *
	 * CDB: Acquire lock on qDisp before dispatching to qExecs, so
	 * qDisp can detect and resolve any deadlocks.
	 */
494
	rel = heap_open(seqoid, AccessExclusiveLock);
495
	tupDesc = RelationGetDescr(rel);
496

497 498
	/* Initialize first page of relation with special magic number */

499 500 501
	// -------- MirroredLock ----------
	MIRROREDLOCK_BUFMGR_LOCK;
	
502
	buf = ReadBuffer(rel, P_NEW);
503 504
	Assert(BufferGetBlockNumber(buf) == 0);

505
	page = BufferGetPage(buf);
506

507 508
	LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE);

509
	PageInit(page, BufferGetPageSize(buf), sizeof(sequence_magic));
510 511 512
	sm = (sequence_magic *) PageGetSpecialPointer(page);
	sm->magic = SEQ_MAGIC;

513 514 515 516 517
	LockBuffer(buf, BUFFER_LOCK_UNLOCK);
	
	MIRROREDLOCK_BUFMGR_UNLOCK;
	// -------- MirroredLock ----------
	
518 519 520
	/* hack: ensure heap_insert will insert on the just-created page */
	rel->rd_targblock = 0;

521
	/* Now form & insert sequence tuple */
522
	tuple = heap_form_tuple(tupDesc, value, null);
523
	simple_heap_insert(rel, tuple);
524

525 526
	Assert(ItemPointerGetOffsetNumber(&(tuple->t_self)) == FirstOffsetNumber);

527 528 529 530
	// Fetch gp_persistent_relation_node information that will be added to XLOG record.
	Assert(rel != NULL);
	Sequence_FetchGpRelationNodeForXLog(rel);

531
	/*
532 533
	 * Two special hacks here:
	 *
534 535
	 * 1. Since VACUUM does not process sequences, we have to force the tuple
	 * to have xmin = FrozenTransactionId now.	Otherwise it would become
B
Bruce Momjian 已提交
536
	 * invisible to SELECTs after 2G transactions.	It is okay to do this
537 538 539
	 * because if the current transaction aborts, no other xact will ever
	 * examine the sequence tuple anyway.
	 *
B
Bruce Momjian 已提交
540 541 542 543 544
	 * 2. Even though heap_insert emitted a WAL log record, we have to emit an
	 * XLOG_SEQ_LOG record too, since (a) the heap_insert record will not have
	 * the right xmin, and (b) REDO of the heap_insert record would re-init
	 * page and sequence magic number would be lost.  This means two log
	 * records instead of one :-(
545
	 */
546 547 548 549

	// -------- MirroredLock ----------
	MIRROREDLOCK_BUFMGR_LOCK;

550
	LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE);
551

552
	START_CRIT_SECTION();
553 554 555

	{
		/*
B
Bruce Momjian 已提交
556
		 * Note that the "tuple" structure is still just a local tuple record
557
		 * created by heap_form_tuple; its t_data pointer doesn't point at the
B
Bruce Momjian 已提交
558 559 560
		 * disk buffer.  To scribble on the disk buffer we need to fetch the
		 * item pointer.  But do the same to the local tuple, since that will
		 * be the source for the WAL log record, below.
561 562 563 564 565 566 567
		 */
		ItemId		itemId;
		Item		item;

		itemId = PageGetItemId((Page) page, FirstOffsetNumber);
		item = PageGetItem((Page) page, itemId);

568
		HeapTupleHeaderSetXmin((HeapTupleHeader) item, FrozenTransactionId);
569 570
		((HeapTupleHeader) item)->t_infomask |= HEAP_XMIN_COMMITTED;

571
		HeapTupleHeaderSetXmin(tuple->t_data, FrozenTransactionId);
572 573 574
		tuple->t_data->t_infomask |= HEAP_XMIN_COMMITTED;
	}

575 576
	MarkBufferDirty(buf);

577 578
	/* XLOG stuff */
	if (!rel->rd_istemp)
579
	{
580 581 582
		xl_seq_rec	xlrec;
		XLogRecPtr	recptr;
		XLogRecData rdata[2];
583 584

		xlrec.node = rel->rd_node;
585
		RelationGetPTInfo(rel, &xlrec.persistentTid, &xlrec.persistentSerialNum);
586

587 588
		rdata[0].data = (char *) &xlrec;
		rdata[0].len = sizeof(xl_seq_rec);
589
		rdata[0].buffer = InvalidBuffer;
590 591
		rdata[0].next = &(rdata[1]);

592
		rdata[1].data = (char *) tuple->t_data;
593
		rdata[1].len = tuple->t_len;
594
		rdata[1].buffer = InvalidBuffer;
595 596
		rdata[1].next = NULL;

597
		recptr = XLogInsert(RM_SEQ_ID, XLOG_SEQ_LOG, rdata);
598 599 600

		PageSetLSN(page, recptr);
	}
601

602
	END_CRIT_SECTION();
603

604 605
	UnlockReleaseBuffer(buf);

606 607 608
	MIRROREDLOCK_BUFMGR_UNLOCK;
	// -------- MirroredLock ----------

609 610 611 612
	/* process OWNED BY if given */
	if (owned_by)
		process_owned_by(rel, owned_by);

613
	heap_close(rel, NoLock);
614 615 616 617 618

	
	/* Dispatch to segments */
	if (shouldDispatch)
	{
619 620 621 622
		CdbDispatchUtilityStatement((Node *) seq,
									DF_CANCEL_ON_ERROR|
									DF_WITH_SNAPSHOT|
									DF_NEED_TWO_PHASE,
623
									GetAssignedOidsForDispatch(),
624
									NULL);
625
	}
626 627
}

B
Bruce Momjian 已提交
628 629 630
/*
 * AlterSequence
 *
631
 * Modify the definition of a sequence relation
B
Bruce Momjian 已提交
632 633
 */
void
634
AlterSequence(AlterSeqStmt *stmt)
B
Bruce Momjian 已提交
635
{
636
	Oid			relid;
637 638 639 640 641 642 643 644 645 646 647 648

	/* find sequence */
	relid = RangeVarGetRelid(stmt->sequence, false);

	/* allow ALTER to sequence owner only */
	/* if you change this, see also callers of AlterSequenceInternal! */
	if (!pg_class_ownercheck(relid, GetUserId()))
		aclcheck_error(ACLCHECK_NOT_OWNER, ACL_KIND_CLASS,
					   stmt->sequence->relname);

	/* do the work */
	AlterSequenceInternal(relid, stmt->options);
649 650 651 652 653 654 655 656

	if (Gp_role == GP_ROLE_DISPATCH)
		CdbDispatchUtilityStatement((Node *) stmt,
									DF_CANCEL_ON_ERROR|
									DF_WITH_SNAPSHOT|
									DF_NEED_TWO_PHASE,
									NIL,
									NULL);
657 658 659 660 661 662 663 664 665 666
}

/*
 * AlterSequenceInternal
 *
 * Same as AlterSequence except that the sequence is specified by OID
 * and we assume the caller already checked permissions.
 */
void
AlterSequenceInternal(Oid relid, List *options)
B
Bruce Momjian 已提交
667
{
668 669
	MIRROREDLOCK_BUFMGR_DECLARE;

B
Bruce Momjian 已提交
670 671 672 673 674
	SeqTable	elm;
	Relation	seqrel;
	Buffer		buf;
	Form_pg_sequence seq;
	FormData_pg_sequence new;
675
	List	   *owned_by;
676
	HeapTupleData seqtuple;
677
	int64		save_increment;
678
	bool		bSeqIsTemp = false;
679 680 681
	int			numopts	   = 0;
	char	   *alter_subtype = "";		/* metadata tracking: kind of
										   redundant to say "role" */
B
Bruce Momjian 已提交
682 683

	/* open and AccessShareLock sequence */
684
	init_sequence(relid, &elm, &seqrel);
B
Bruce Momjian 已提交
685 686

	/* lock page' buffer and read tuple into new sequence structure */
687 688 689
	
	// -------- MirroredLock ----------
	MIRROREDLOCK_BUFMGR_LOCK;
690 691 692

	/* hack to keep ALTER SEQUENCE OWNED BY from changing currval state */
	save_increment = elm->increment;
693
	
694
	seq = read_seq_tuple(elm, seqrel, &buf, &seqtuple);
695
	elm->increment = seq->increment_by;
B
Bruce Momjian 已提交
696

697 698
	/* Copy old values of options into workspace */
	memcpy(&new, seq, sizeof(FormData_pg_sequence));
B
Bruce Momjian 已提交
699

700
	/* Check and set new values */
701
	init_params(options, false, &new, &owned_by);
B
Bruce Momjian 已提交
702

703 704 705 706 707 708 709 710
	if (owned_by)
	{
		/* Restore previous state of elm (assume nothing else changes) */
		elm->increment = save_increment;
	}
	else
	{
		/* Clear local cache so that we don't think we have cached numbers */
711 712
		/* Note that we do not change the currval() state */
		elm->cached = elm->last;
713 714 715 716 717
	}

	// Fetch gp_persistent_relation_node information that will be added to XLOG record.
	Assert(seqrel != NULL);
	Sequence_FetchGpRelationNodeForXLog(seqrel);
718

719
	/* Now okay to update the on-disk tuple */
B
Bruce Momjian 已提交
720 721
	START_CRIT_SECTION();

722 723
	memcpy(seq, &new, sizeof(FormData_pg_sequence));

724 725
	MarkBufferDirty(buf);

B
Bruce Momjian 已提交
726
	/* XLOG stuff */
727 728 729 730

	bSeqIsTemp = seqrel->rd_istemp;

	if (!bSeqIsTemp)
B
Bruce Momjian 已提交
731 732 733 734
	{
		xl_seq_rec	xlrec;
		XLogRecPtr	recptr;
		XLogRecData rdata[2];
735
		Page		page = BufferGetPage(buf);
B
Bruce Momjian 已提交
736 737

		xlrec.node = seqrel->rd_node;
738
		RelationGetPTInfo(seqrel, &xlrec.persistentTid, &xlrec.persistentSerialNum);
739

B
Bruce Momjian 已提交
740 741
		rdata[0].data = (char *) &xlrec;
		rdata[0].len = sizeof(xl_seq_rec);
742
		rdata[0].buffer = InvalidBuffer;
B
Bruce Momjian 已提交
743 744
		rdata[0].next = &(rdata[1]);

745 746
		rdata[1].data = (char *) seqtuple.t_data;
		rdata[1].len = seqtuple.t_len;
747
		rdata[1].buffer = InvalidBuffer;
B
Bruce Momjian 已提交
748 749
		rdata[1].next = NULL;

750
		recptr = XLogInsert(RM_SEQ_ID, XLOG_SEQ_LOG, rdata);
B
Bruce Momjian 已提交
751 752 753 754 755 756

		PageSetLSN(page, recptr);
	}

	END_CRIT_SECTION();

757
	UnlockReleaseBuffer(buf);
B
Bruce Momjian 已提交
758

759 760 761
	MIRROREDLOCK_BUFMGR_UNLOCK;
	// -------- MirroredLock ----------

762 763 764 765
	/* process OWNED BY if given */
	if (owned_by)
		process_owned_by(seqrel, owned_by);

B
Bruce Momjian 已提交
766
	relation_close(seqrel, NoLock);
767

768
	numopts = list_length(options);
769 770 771 772 773 774 775 776 777 778 779 780 781 782 783

	if (numopts > 1)
	{
		char allopts[NAMEDATALEN];

		sprintf(allopts, "%d OPTIONS", numopts);

		alter_subtype = pstrdup(allopts);
	}
	else if (0 == numopts)
	{
		alter_subtype = "0 OPTIONS";
	}
	else if ((Gp_role == GP_ROLE_DISPATCH) && (!bSeqIsTemp))
	{
784
		ListCell		*option = list_head(options);
785 786 787 788 789 790 791 792 793 794 795 796
		DefElem			*defel	= (DefElem *) lfirst(option);
		char			*tempo	= NULL;

		alter_subtype = defel->defname;
		if (0 == strcmp(alter_subtype, "owned_by"))
			alter_subtype = "OWNED BY";

		tempo = str_toupper(alter_subtype, strlen(alter_subtype));

		alter_subtype = tempo;
	}

797
	if (Gp_role == GP_ROLE_DISPATCH && !bSeqIsTemp)
798
	{
799 800 801 802 803
		/* MPP-6929: metadata tracking */
		MetaTrackUpdObject(RelationRelationId,
						   relid,
						   GetUserId(),
						   "ALTER", alter_subtype);
804
	}
B
Bruce Momjian 已提交
805 806
}

807

808 809 810 811 812
/*
 * Note: nextval with a text argument is no longer exported as a pg_proc
 * entry, but we keep it around to ease porting of C code that may have
 * called the function directly.
 */
813 814
Datum
nextval(PG_FUNCTION_ARGS)
815
{
816
	text	   *seqin = PG_GETARG_TEXT_P(0);
817
	RangeVar   *sequence;
818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836
	Oid			relid;

	sequence = makeRangeVarFromNameList(textToQualifiedNameList(seqin));
	relid = RangeVarGetRelid(sequence, false);

	PG_RETURN_INT64(nextval_internal(relid));
}

Datum
nextval_oid(PG_FUNCTION_ARGS)
{
	Oid			relid = PG_GETARG_OID(0);

	PG_RETURN_INT64(nextval_internal(relid));
}

static int64
nextval_internal(Oid relid)
{
837
	SeqTable	elm;
838
	Relation	seqrel;
839 840 841 842 843 844 845
	bool is_overflow = false;

	/* open and AccessShareLock sequence */
	init_sequence(relid, &elm, &seqrel);

	if (elm->last != elm->cached)		/* some numbers were cached */
	{
846 847
		Assert(elm->last_valid);
		Assert(elm->increment != 0);
848 849
		elm->last += elm->increment;
		relation_close(seqrel, NoLock);
850
		last_used_seq = elm;
851 852 853 854 855 856 857 858 859
		return elm->last;
	}

	if (pg_class_aclcheck(elm->relid, GetUserId(), ACL_UPDATE) != ACLCHECK_OK)
		ereport(ERROR,
				(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
				 errmsg("permission denied for sequence %s",
						RelationGetRelationName(seqrel))));

860 861 862 863 864 865 866 867 868
	/* Update the sequence object. */
	if (Gp_role == GP_ROLE_EXECUTE)
		cdb_sequence_nextval_proxy(seqrel,
								   &elm->last,
								   &elm->cached,
								   &elm->increment,
								   &is_overflow);
	else
		cdb_sequence_nextval(elm,
869
							 seqrel,
870 871 872 873
							 &elm->last,
							 &elm->cached,
							 &elm->increment,
							 &is_overflow);
874 875
	last_used_seq = elm;

876 877 878
	if (is_overflow)
	{
		relation_close(seqrel, NoLock);
879

880 881 882
		ereport(ERROR,
				(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
				 errmsg("nextval: reached %s value of sequence \"%s\" (" INT64_FORMAT ")",
883 884
                        elm->increment>0 ? "maximum":"minimum",
                        RelationGetRelationName(seqrel), elm->last)));
885 886 887
	}
	else
		elm->last_valid = true;
888 889

	relation_close(seqrel, NoLock);
890 891 892 893
	return elm->last;
}


894 895 896
static void
cdb_sequence_nextval(SeqTable elm,
					 Relation   seqrel,
897 898 899 900 901 902 903
                     int64     *plast,
                     int64     *pcached,
                     int64     *pincrement,
                     bool      *poverflow)
{
	MIRROREDLOCK_BUFMGR_DECLARE;

904
	Buffer		buf;
905
	Page		page;
906
	HeapTupleData seqtuple;
907
	Form_pg_sequence seq;
908
	int64		incby,
909 910
				maxv,
				minv,
V
Vadim B. Mikheev 已提交
911 912 913 914
				cache,
				log,
				fetch,
				last;
915
	int64		result,
916 917
				next,
				rescnt = 0;
918
	bool 		have_overflow = false;
V
Vadim B. Mikheev 已提交
919
	bool		logit = false;
920

921
	/* lock page' buffer and read tuple */
922 923 924 925
	
	// -------- MirroredLock ----------
	MIRROREDLOCK_BUFMGR_LOCK;
	
926
	seq = read_seq_tuple(elm, seqrel, &buf, &seqtuple);
927
	page = BufferGetPage(buf);
928

V
Vadim B. Mikheev 已提交
929
	last = next = result = seq->last_value;
930 931 932
	incby = seq->increment_by;
	maxv = seq->max_value;
	minv = seq->min_value;
V
Vadim B. Mikheev 已提交
933 934
	fetch = cache = seq->cache_value;
	log = seq->log_cnt;
935

936
	if (!seq->is_called)
V
Vadim B. Mikheev 已提交
937
	{
938
		rescnt++;				/* return last_value if not is_called */
V
Vadim B. Mikheev 已提交
939 940
		fetch--;
	}
941

942
	/*
B
Bruce Momjian 已提交
943 944 945
	 * Decide whether we should emit a WAL log record.	If so, force up the
	 * fetch count to grab SEQ_LOG_VALS more values than we actually need to
	 * cache.  (These will then be usable without logging.)
946
	 *
947 948 949 950
	 * If this is the first nextval after a checkpoint, we must force a new
	 * WAL record to be written anyway, else replay starting from the
	 * checkpoint would fail to advance the sequence past the logged values.
	 * In this case we may as well fetch extra values.
951
	 */
952
	if (log < fetch || !seq->is_called)
V
Vadim B. Mikheev 已提交
953
	{
954 955
		/* forced log to satisfy local demand for values */
		fetch = log = fetch + SEQ_LOG_VALS;
V
Vadim B. Mikheev 已提交
956 957
		logit = true;
	}
958 959 960 961 962 963 964 965 966 967 968
	else
	{
		XLogRecPtr	redoptr = GetRedoRecPtr();

		if (XLByteLE(PageGetLSN(page), redoptr))
		{
			/* last update of seq was before checkpoint */
			fetch = log = fetch + SEQ_LOG_VALS;
			logit = true;
		}
	}
V
Vadim B. Mikheev 已提交
969

B
Bruce Momjian 已提交
970
	while (fetch)				/* try to fetch cache [+ log ] numbers */
971
	{
972
		/*
B
Bruce Momjian 已提交
973 974
		 * Check MAXVALUE for ascending sequences and MINVALUE for descending
		 * sequences
975
		 */
976
		if (incby > 0)
977
		{
978
			/* ascending sequence */
979 980 981 982
			if ((maxv >= 0 && next > maxv - incby) ||
				(maxv < 0 && next + incby > maxv))
			{
				if (rescnt > 0)
V
Vadim B. Mikheev 已提交
983
					break;		/* stop fetching */
984
				if (!seq->is_cycled)
985
				{
986 987 988 989 990
					have_overflow = true;
				}
				else
				{
					next = minv;
991
				}
992 993 994 995 996 997
			}
			else
				next += incby;
		}
		else
		{
998
			/* descending sequence */
999 1000 1001 1002
			if ((minv < 0 && next < minv - incby) ||
				(minv >= 0 && next + incby < minv))
			{
				if (rescnt > 0)
V
Vadim B. Mikheev 已提交
1003
					break;		/* stop fetching */
1004
				if (!seq->is_cycled)
1005
				{
1006 1007 1008 1009 1010
					have_overflow = true;
				}
				else
				{
					next = maxv;
1011
				}
1012 1013 1014 1015
			}
			else
				next += incby;
		}
V
Vadim B. Mikheev 已提交
1016 1017 1018 1019 1020 1021
		fetch--;
		if (rescnt < cache)
		{
			log--;
			rescnt++;
			last = next;
B
Bruce Momjian 已提交
1022 1023
			if (rescnt == 1)	/* if it's first result - */
				result = next;	/* it's what to return */
V
Vadim B. Mikheev 已提交
1024
		}
1025 1026
	}

1027 1028 1029
	log -= fetch;				/* adjust for any unfetched numbers */
	Assert(log >= 0);

1030 1031 1032 1033 1034
    /* set results for caller */
	*poverflow = have_overflow; /* has the sequence overflown */
    *plast = result;            /* last returned number */
    *pcached = last;            /* last fetched number */
	*pincrement = incby;
V
Vadim B. Mikheev 已提交
1035

1036 1037 1038
	// Fetch gp_persistent_relation_node information that will be added to XLOG record.
	Assert(seqrel != NULL);
	Sequence_FetchGpRelationNodeForXLog(seqrel);
1039

1040
	/* ready to change the on-disk (or really, in-buffer) tuple */
1041
	START_CRIT_SECTION();
1042

1043 1044 1045 1046 1047 1048 1049 1050 1051
	/*
	 * We must mark the buffer dirty before doing XLogInsert(); see notes in
	 * SyncOneBuffer().  However, we don't apply the desired changes just yet.
	 * This looks like a violation of the buffer update protocol, but it is
	 * in fact safe because we hold exclusive lock on the buffer.  Any other
	 * process, including a checkpoint, that tries to examine the buffer
	 * contents will block until we release the lock, and then will see the
	 * final state that we install below.
	 */
1052 1053
	MarkBufferDirty(buf);

1054 1055
	/* XLOG stuff */
	if (logit && !seqrel->rd_istemp)
V
Vadim B. Mikheev 已提交
1056 1057 1058
	{
		xl_seq_rec	xlrec;
		XLogRecPtr	recptr;
B
Bruce Momjian 已提交
1059
		XLogRecData rdata[2];
V
Vadim B. Mikheev 已提交
1060

1061 1062 1063 1064 1065 1066
		/*
		 * We don't log the current state of the tuple, but rather the state
		 * as it would appear after "log" more fetches.  This lets us skip
		 * that many future WAL records, at the cost that we lose those
		 * sequence values if we crash.
		 */
1067

1068
		/* set values that will be saved in xlog */
1069
		seq->last_value = next;
1070
		seq->is_called = true;
1071
		seq->log_cnt = 0;
1072

1073
		xlrec.node = seqrel->rd_node;
1074
		RelationGetPTInfo(seqrel, &xlrec.persistentTid, &xlrec.persistentSerialNum);
B
Bruce Momjian 已提交
1075
		rdata[0].data = (char *) &xlrec;
1076
		rdata[0].len = sizeof(xl_seq_rec);
1077
		rdata[0].buffer = InvalidBuffer;
1078 1079
		rdata[0].next = &(rdata[1]);

1080 1081
		rdata[1].data = (char *) seqtuple.t_data;
		rdata[1].len = seqtuple.t_len;
1082
		rdata[1].buffer = InvalidBuffer;
1083 1084
		rdata[1].next = NULL;

1085
		recptr = XLogInsert(RM_SEQ_ID, XLOG_SEQ_LOG, rdata);
V
Vadim B. Mikheev 已提交
1086

1087
		PageSetLSN(page, recptr);
1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100

		/* need to update where we've inserted to into shmem so that the QD can flush it
		 * when necessary
		 */
		LWLockAcquire(SeqServerControlLock, LW_EXCLUSIVE);

		if (XLByteLT(seqServerCtl->lastXlogEntry, recptr))
		{
			seqServerCtl->lastXlogEntry.xlogid = recptr.xlogid;
			seqServerCtl->lastXlogEntry.xrecoff = recptr.xrecoff;
		}

		LWLockRelease(SeqServerControlLock);
V
Vadim B. Mikheev 已提交
1101
	}
1102

1103
	/* Now update sequence tuple to the intended final state */
V
Vadim B. Mikheev 已提交
1104
	seq->last_value = last;		/* last fetched number */
1105
	seq->is_called = true;
V
Vadim B. Mikheev 已提交
1106
	seq->log_cnt = log;			/* how much is logged */
1107

1108
	END_CRIT_SECTION();
1109

1110
	UnlockReleaseBuffer(buf);
1111 1112 1113 1114 1115
	
	MIRROREDLOCK_BUFMGR_UNLOCK;
	// -------- MirroredLock ----------
	
}                               /* cdb_sequence_nextval */
1116

1117

1118
Datum
1119
currval_oid(PG_FUNCTION_ARGS)
1120
{
1121 1122
	Oid			relid = PG_GETARG_OID(0);
	int64		result;
1123
	SeqTable	elm;
1124
	Relation	seqrel;
1125

1126 1127 1128 1129 1130 1131 1132 1133
	/* For now, strictly forbidden on MPP. */
	if (Gp_role == GP_ROLE_DISPATCH || Gp_role == GP_ROLE_EXECUTE)
	{
		ereport(ERROR,
				(errcode(ERRCODE_GP_FEATURE_NOT_SUPPORTED),
				 errmsg("currval() not supported")));
	}

V
Vadim B. Mikheev 已提交
1134
	/* open and AccessShareLock sequence */
1135
	init_sequence(relid, &elm, &seqrel);
1136

1137 1138
	if (pg_class_aclcheck(elm->relid, GetUserId(), ACL_SELECT) != ACLCHECK_OK &&
		pg_class_aclcheck(elm->relid, GetUserId(), ACL_USAGE) != ACLCHECK_OK)
1139 1140
		ereport(ERROR,
				(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
1141
				 errmsg("permission denied for sequence %s",
1142
						RelationGetRelationName(seqrel))));
1143

1144
	if (!elm->last_valid)
1145 1146
		ereport(ERROR,
				(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
1147
				 errmsg("currval of sequence \"%s\" is not yet defined in this session",
1148
						RelationGetRelationName(seqrel))));
1149 1150 1151

	result = elm->last;

1152 1153
	relation_close(seqrel, NoLock);

1154
	PG_RETURN_INT64(result);
1155 1156
}

1157 1158 1159 1160 1161 1162
Datum
lastval(PG_FUNCTION_ARGS)
{
	Relation	seqrel;
	int64		result;

1163 1164 1165 1166 1167 1168 1169 1170
	/* For now, strictly forbidden on MPP. */
	if (Gp_role == GP_ROLE_DISPATCH || Gp_role == GP_ROLE_EXECUTE)
	{
		ereport(ERROR,
				(errcode(ERRCODE_GP_FEATURE_NOT_SUPPORTED),
				 errmsg("lastval() not supported")));
	}

1171 1172 1173 1174 1175 1176
	if (last_used_seq == NULL)
		ereport(ERROR,
				(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
				 errmsg("lastval is not yet defined in this session")));

	/* Someone may have dropped the sequence since the last nextval() */
1177 1178 1179
	if (!SearchSysCacheExists(RELOID,
							  ObjectIdGetDatum(last_used_seq->relid),
							  0, 0, 0))
1180 1181 1182 1183
		ereport(ERROR,
				(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
				 errmsg("lastval is not yet defined in this session")));

1184
	seqrel = open_share_lock(last_used_seq);
1185 1186

	/* nextval() must have already been called for this sequence */
1187
	Assert(last_used_seq->last_valid);
1188

1189 1190
	if (pg_class_aclcheck(last_used_seq->relid, GetUserId(), ACL_SELECT) != ACLCHECK_OK &&
		pg_class_aclcheck(last_used_seq->relid, GetUserId(), ACL_USAGE) != ACLCHECK_OK)
1191 1192 1193 1194 1195 1196 1197
		ereport(ERROR,
				(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
				 errmsg("permission denied for sequence %s",
						RelationGetRelationName(seqrel))));

	result = last_used_seq->last;
	relation_close(seqrel, NoLock);
1198

1199 1200 1201
	PG_RETURN_INT64(result);
}

B
Bruce Momjian 已提交
1202
/*
1203 1204 1205 1206
 * Main internal procedure that handles 2 & 3 arg forms of SETVAL.
 *
 * Note that the 3 arg version (which sets the is_called flag) is
 * only for use in pg_dump, and setting the is_called flag may not
B
Bruce Momjian 已提交
1207
 * work if multiple users are attached to the database and referencing
1208 1209
 * the sequence (unlikely if pg_dump is restoring it).
 *
B
Bruce Momjian 已提交
1210
 * It is necessary to have the 3 arg version so that pg_dump can
1211 1212 1213 1214
 * restore the state of a sequence exactly during data-only restores -
 * it is the only way to clear the is_called flag in an existing
 * sequence.
 */
B
Bruce Momjian 已提交
1215
static void
1216
do_setval(Oid relid, int64 next, bool iscalled)
M
 
Marc G. Fournier 已提交
1217
{
1218 1219
	MIRROREDLOCK_BUFMGR_DECLARE;

M
 
Marc G. Fournier 已提交
1220
	SeqTable	elm;
1221
	Relation	seqrel;
1222
	Buffer		buf;
1223
	HeapTupleData seqtuple;
1224
	Form_pg_sequence seq;
M
 
Marc G. Fournier 已提交
1225

1226 1227 1228 1229 1230 1231 1232
	if (Gp_role == GP_ROLE_EXECUTE)
	{
		ereport(ERROR,
				(errcode(ERRCODE_GP_FEATURE_NOT_SUPPORTED),
				 errmsg("setval() not supported in this context")));
	}

1233
	/* open and AccessShareLock sequence */
1234
	init_sequence(relid, &elm, &seqrel);
1235 1236

	if (pg_class_aclcheck(elm->relid, GetUserId(), ACL_UPDATE) != ACLCHECK_OK)
1237 1238
		ereport(ERROR,
				(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
1239
				 errmsg("permission denied for sequence %s",
1240
						RelationGetRelationName(seqrel))));
M
 
Marc G. Fournier 已提交
1241

1242
	/* lock page' buffer and read tuple */
1243 1244 1245 1246
	
	// -------- MirroredLock ----------
	MIRROREDLOCK_BUFMGR_LOCK;
	
1247
	seq = read_seq_tuple(elm, seqrel, &buf, &seqtuple);
1248
	elm->increment = seq->increment_by;
M
 
Marc G. Fournier 已提交
1249

1250
	if ((next < seq->min_value) || (next > seq->max_value))
1251
	{
B
Bruce Momjian 已提交
1252 1253 1254 1255
		char		bufv[100],
					bufm[100],
					bufx[100];

1256 1257 1258
		snprintf(bufv, sizeof(bufv), INT64_FORMAT, next);
		snprintf(bufm, sizeof(bufm), INT64_FORMAT, seq->min_value);
		snprintf(bufx, sizeof(bufx), INT64_FORMAT, seq->max_value);
1259 1260
		ereport(ERROR,
				(errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
1261
				 errmsg("setval: value %s is out of bounds for sequence \"%s\" (%s..%s)",
1262 1263
						bufv, RelationGetRelationName(seqrel),
						bufm, bufx)));
1264
	}
M
 
Marc G. Fournier 已提交
1265

1266 1267 1268 1269 1270 1271 1272 1273 1274
	/* Set the currval() state only if iscalled = true */
	if (iscalled)
	{
		elm->last = next;		/* last returned number */
		elm->last_valid = true;
	}

	/* In any case, forget any future cached numbers */
	elm->cached = elm->last;
M
 
Marc G. Fournier 已提交
1275

1276 1277 1278 1279
	// Fetch gp_persistent_relation_node information that will be added to XLOG record.
	Assert(seqrel != NULL);
	Sequence_FetchGpRelationNodeForXLog(seqrel);

1280
	/* ready to change the on-disk (or really, in-buffer) tuple */
1281
	START_CRIT_SECTION();
1282

1283 1284 1285 1286
	seq->last_value = next;		/* last fetched number */
	seq->is_called = iscalled;
	seq->log_cnt = 0;

1287 1288
	MarkBufferDirty(buf);

1289 1290
	/* XLOG stuff */
	if (!seqrel->rd_istemp)
V
Vadim B. Mikheev 已提交
1291 1292 1293
	{
		xl_seq_rec	xlrec;
		XLogRecPtr	recptr;
B
Bruce Momjian 已提交
1294
		XLogRecData rdata[2];
1295
		Page		page = BufferGetPage(buf);
V
Vadim B. Mikheev 已提交
1296

1297
		xlrec.node = seqrel->rd_node;
1298
		RelationGetPTInfo(seqrel, &xlrec.persistentTid, &xlrec.persistentSerialNum);
1299

B
Bruce Momjian 已提交
1300
		rdata[0].data = (char *) &xlrec;
1301
		rdata[0].len = sizeof(xl_seq_rec);
1302
		rdata[0].buffer = InvalidBuffer;
1303 1304
		rdata[0].next = &(rdata[1]);

1305 1306
		rdata[1].data = (char *) seqtuple.t_data;
		rdata[1].len = seqtuple.t_len;
1307
		rdata[1].buffer = InvalidBuffer;
1308 1309
		rdata[1].next = NULL;

1310
		recptr = XLogInsert(RM_SEQ_ID, XLOG_SEQ_LOG, rdata);
1311 1312

		PageSetLSN(page, recptr);
V
Vadim B. Mikheev 已提交
1313
	}
1314

1315
	END_CRIT_SECTION();
M
 
Marc G. Fournier 已提交
1316

1317
	UnlockReleaseBuffer(buf);
1318

1319 1320 1321
	MIRROREDLOCK_BUFMGR_UNLOCK;
	// -------- MirroredLock ----------

1322
	relation_close(seqrel, NoLock);
1323 1324
}

1325 1326 1327 1328
/*
 * Implement the 2 arg setval procedure.
 * See do_setval for discussion.
 */
1329
Datum
1330
setval_oid(PG_FUNCTION_ARGS)
1331
{
1332
	Oid			relid = PG_GETARG_OID(0);
1333
	int64		next = PG_GETARG_INT64(1);
1334

1335
	do_setval(relid, next, true);
1336

1337
	PG_RETURN_INT64(next);
1338 1339
}

1340 1341 1342 1343
/*
 * Implement the 3 arg setval procedure.
 * See do_setval for discussion.
 */
1344
Datum
1345
setval3_oid(PG_FUNCTION_ARGS)
1346
{
1347
	Oid			relid = PG_GETARG_OID(0);
1348
	int64		next = PG_GETARG_INT64(1);
1349 1350
	bool		iscalled = PG_GETARG_BOOL(2);

1351
	do_setval(relid, next, iscalled);
1352

1353
	PG_RETURN_INT64(next);
M
 
Marc G. Fournier 已提交
1354 1355
}

1356

1357
/*
1358 1359
 * Open the sequence and acquire AccessShareLock if needed
 *
1360
 * If we haven't touched the sequence already in this transaction,
B
Bruce Momjian 已提交
1361
 * we need to acquire AccessShareLock.	We arrange for the lock to
1362 1363 1364
 * be owned by the top transaction, so that we don't need to do it
 * more than once per xact.
 */
1365 1366
static Relation
open_share_lock(SeqTable seq)
1367
{
1368
	LocalTransactionId thislxid = MyProc->lxid;
1369

1370
	/* Get the lock if not already held in this xact */
1371
	if (seq->lxid != thislxid)
1372 1373 1374 1375 1376 1377 1378
	{
		ResourceOwner currentOwner;

		currentOwner = CurrentResourceOwner;
		PG_TRY();
		{
			CurrentResourceOwner = TopTransactionResourceOwner;
1379
			LockRelationOid(seq->relid, AccessShareLock);
1380 1381 1382 1383 1384 1385 1386 1387 1388 1389
		}
		PG_CATCH();
		{
			/* Ensure CurrentResourceOwner is restored on error */
			CurrentResourceOwner = currentOwner;
			PG_RE_THROW();
		}
		PG_END_TRY();
		CurrentResourceOwner = currentOwner;

1390
		/* Flag that we have a lock in the current xact */
1391
		seq->lxid = thislxid;
1392
	}
1393 1394 1395

	/* We now know we have AccessShareLock, and can safely open the rel */
	return relation_open(seq->relid, NoLock);
1396 1397
}

1398
/*
1399
 * Given a relation OID, open and lock the sequence.  p_elm and p_rel are
1400
 * output parameters.
1401 1402
 *
 * GPDB: If p_rel is NULL, the sequence relation is not opened or locked.
1403 1404
 */
static void
1405
init_sequence(Oid relid, SeqTable *p_elm, Relation *p_rel)
1406
{
B
Bruce Momjian 已提交
1407
	SeqTable	elm;
1408
	Relation	seqrel;
1409

1410 1411 1412 1413 1414 1415 1416
	/* Look to see if we already have a seqtable entry for relation */
	for (elm = seqtab; elm != NULL; elm = elm->next)
	{
		if (elm->relid == relid)
			break;
	}

1417
	/*
1418
	 * Allocate new seqtable entry if we didn't find one.
1419
	 *
B
Bruce Momjian 已提交
1420 1421 1422
	 * NOTE: seqtable entries remain in the list for the life of a backend. If
	 * the sequence itself is deleted then the entry becomes wasted memory,
	 * but it's small enough that this should not matter.
B
Bruce Momjian 已提交
1423
	 */
1424
	if (elm == NULL)
1425
	{
1426
		/*
B
Bruce Momjian 已提交
1427 1428
		 * Time to make a new seqtable entry.  These entries live as long as
		 * the backend does, so we use plain malloc for them.
1429 1430
		 */
		elm = (SeqTable) malloc(sizeof(SeqTableData));
T
Tom Lane 已提交
1431
		if (elm == NULL)
1432 1433 1434
			ereport(ERROR,
					(errcode(ERRCODE_OUT_OF_MEMORY),
					 errmsg("out of memory")));
1435
		elm->relid = relid;
1436
		elm->lxid = InvalidLocalTransactionId;
1437
		elm->last_valid = false;
1438 1439 1440
		elm->last = elm->cached = elm->increment = 0;
		elm->next = seqtab;
		seqtab = elm;
1441 1442
	}

1443 1444 1445
	/*
	 * Open the sequence relation.
	 */
1446 1447 1448
	if (p_rel)
	{
		seqrel = open_share_lock(elm);
1449

1450 1451 1452 1453 1454
		if (seqrel->rd_rel->relkind != RELKIND_SEQUENCE)
			ereport(ERROR,
					(errcode(ERRCODE_WRONG_OBJECT_TYPE),
					 errmsg("\"%s\" is not a sequence",
							RelationGetRelationName(seqrel))));
1455

1456 1457
		*p_rel = seqrel;
	}
1458
	*p_elm = elm;
1459 1460 1461
}


1462 1463 1464 1465 1466 1467 1468 1469 1470
/*
 * Given an opened sequence relation, lock the page buffer and find the tuple
 *
 * *buf receives the reference to the pinned-and-ex-locked buffer
 * *seqtuple receives the reference to the sequence tuple proper
 *		(this arg should point to a local variable of type HeapTupleData)
 *
 * Function's return value points to the data payload of the tuple
 */
1471
static Form_pg_sequence
1472
read_seq_tuple(SeqTable elm, Relation rel, Buffer *buf, HeapTuple seqtuple)
1473
{
1474
	Page		page;
1475 1476 1477
	ItemId		lp;
	sequence_magic *sm;
	Form_pg_sequence seq;
1478

1479 1480
	MIRROREDLOCK_BUFMGR_MUST_ALREADY_BE_HELD;

1481 1482 1483
	*buf = ReadBuffer(rel, 0);
	LockBuffer(*buf, BUFFER_LOCK_EXCLUSIVE);

1484
	page = BufferGetPage(*buf);
1485 1486 1487
	sm = (sequence_magic *) PageGetSpecialPointer(page);

	if (sm->magic != SEQ_MAGIC)
1488 1489
		elog(ERROR, "bad magic number in sequence \"%s\": %08X",
			 RelationGetRelationName(rel), sm->magic);
1490 1491

	lp = PageGetItemId(page, FirstOffsetNumber);
1492
	Assert(ItemIdIsNormal(lp));
1493 1494 1495 1496

	/* Note we currently only bother to set these two fields of *seqtuple */
	seqtuple->t_data = (HeapTupleHeader) PageGetItem((Page) page, lp);
	seqtuple->t_len = ItemIdGetLength(lp);
1497

1498 1499 1500 1501 1502 1503 1504 1505
	/*
	 * Previous releases of Postgres neglected to prevent SELECT FOR UPDATE
	 * on a sequence, which would leave a non-frozen XID in the sequence
	 * tuple's xmax, which eventually leads to clog access failures or worse.
	 * If we see this has happened, clean up after it.  We treat this like a
	 * hint bit update, ie, don't bother to WAL-log it, since we can certainly
	 * do this again if the update gets lost.
	 */
1506
	if (HeapTupleHeaderGetXmax(seqtuple->t_data) != InvalidTransactionId)
1507
	{
1508 1509 1510
		HeapTupleHeaderSetXmax(seqtuple->t_data, InvalidTransactionId);
		seqtuple->t_data->t_infomask &= ~HEAP_XMAX_COMMITTED;
		seqtuple->t_data->t_infomask |= HEAP_XMAX_INVALID;
1511
		MarkBufferDirtyHint(*buf, rel);
1512
	}
1513

1514
	seq = (Form_pg_sequence) GETSTRUCT(seqtuple);
1515

1516
	/* this is a handy place to update our copy of the increment */
1517 1518 1519
	elm->increment = seq->increment_by;

	return seq;
1520 1521
}

1522 1523
/*
 * init_params: process the options list of CREATE or ALTER SEQUENCE,
1524 1525
 * and store the values into appropriate fields of *new.  Also set
 * *owned_by to any OWNED BY option, or to NIL if there is none.
1526 1527 1528 1529
 *
 * If isInit is true, fill any unspecified options with default values;
 * otherwise, do not change existing options that aren't explicitly overridden.
 */
1530
static void
1531 1532
init_params(List *options, bool isInit,
			Form_pg_sequence new, List **owned_by)
1533
{
1534 1535
	DefElem    *start_value = NULL;
	DefElem    *restart_value = NULL;
1536 1537 1538 1539
	DefElem    *increment_by = NULL;
	DefElem    *max_value = NULL;
	DefElem    *min_value = NULL;
	DefElem    *cache_value = NULL;
1540
	DefElem    *is_cycled = NULL;
1541
	ListCell   *option;
1542

1543 1544
	*owned_by = NIL;

B
Bruce Momjian 已提交
1545
	foreach(option, options)
1546
	{
1547
		DefElem    *defel = (DefElem *) lfirst(option);
1548

1549
		if (strcmp(defel->defname, "increment") == 0)
1550 1551
		{
			if (increment_by)
1552 1553 1554
				ereport(ERROR,
						(errcode(ERRCODE_SYNTAX_ERROR),
						 errmsg("conflicting or redundant options")));
1555
			increment_by = defel;
1556
		}
1557 1558
		else if (strcmp(defel->defname, "start") == 0)
		{
1559
			if (start_value)
1560 1561 1562
				ereport(ERROR,
						(errcode(ERRCODE_SYNTAX_ERROR),
						 errmsg("conflicting or redundant options")));
1563
			start_value = defel;
1564 1565
		}
		else if (strcmp(defel->defname, "restart") == 0)
1566
		{
1567
			if (restart_value)
1568 1569 1570
				ereport(ERROR,
						(errcode(ERRCODE_SYNTAX_ERROR),
						 errmsg("conflicting or redundant options")));
1571
			restart_value = defel;
1572
		}
1573
		else if (strcmp(defel->defname, "maxvalue") == 0)
1574 1575
		{
			if (max_value)
1576 1577 1578
				ereport(ERROR,
						(errcode(ERRCODE_SYNTAX_ERROR),
						 errmsg("conflicting or redundant options")));
1579
			max_value = defel;
1580
		}
1581
		else if (strcmp(defel->defname, "minvalue") == 0)
1582 1583
		{
			if (min_value)
1584 1585 1586
				ereport(ERROR,
						(errcode(ERRCODE_SYNTAX_ERROR),
						 errmsg("conflicting or redundant options")));
1587
			min_value = defel;
1588
		}
1589
		else if (strcmp(defel->defname, "cache") == 0)
1590 1591
		{
			if (cache_value)
1592 1593 1594
				ereport(ERROR,
						(errcode(ERRCODE_SYNTAX_ERROR),
						 errmsg("conflicting or redundant options")));
1595
			cache_value = defel;
1596
		}
1597
		else if (strcmp(defel->defname, "cycle") == 0)
1598
		{
1599
			if (is_cycled)
1600 1601 1602
				ereport(ERROR,
						(errcode(ERRCODE_SYNTAX_ERROR),
						 errmsg("conflicting or redundant options")));
1603
			is_cycled = defel;
1604
		}
1605 1606 1607 1608 1609 1610 1611 1612
		else if (strcmp(defel->defname, "owned_by") == 0)
		{
			if (*owned_by)
				ereport(ERROR,
						(errcode(ERRCODE_SYNTAX_ERROR),
						 errmsg("conflicting or redundant options")));
			*owned_by = defGetQualifiedName(defel);
		}
1613
		else
1614
			elog(ERROR, "option \"%s\" not recognized",
1615 1616 1617
				 defel->defname);
	}

1618 1619 1620 1621 1622 1623 1624
	/*
	 * We must reset log_cnt when isInit or when changing any parameters
	 * that would affect future nextval allocations.
	 */
	if (isInit)
		new->log_cnt = 0;

B
Bruce Momjian 已提交
1625
	/* INCREMENT BY */
1626
	if (increment_by != NULL)
B
Bruce Momjian 已提交
1627 1628
	{
		new->increment_by = defGetInt64(increment_by);
1629 1630 1631
		if (new->increment_by == 0)
			ereport(ERROR,
					(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
1632
					 errmsg("INCREMENT must not be zero")));
1633
		new->log_cnt = 0;
B
Bruce Momjian 已提交
1634
	}
1635 1636 1637 1638
	else if (isInit)
		new->increment_by = 1;

	/* CYCLE */
1639
	if (is_cycled != NULL)
1640 1641 1642
	{
		new->is_cycled = intVal(is_cycled->arg);
		Assert(new->is_cycled == false || new->is_cycled == true);
1643
		new->log_cnt = 0;
1644 1645 1646
	}
	else if (isInit)
		new->is_cycled = false;
1647

1648
	/* MAXVALUE (null arg means NO MAXVALUE) */
1649
	if (max_value != NULL && max_value->arg)
1650
	{
1651
		new->max_value = defGetInt64(max_value);
1652 1653
		new->log_cnt = 0;
	}
1654
	else if (isInit || max_value != NULL)
1655
	{
1656
		if (new->increment_by > 0)
B
Bruce Momjian 已提交
1657
			new->max_value = SEQ_MAXVALUE;		/* ascending seq */
1658
		else
B
Bruce Momjian 已提交
1659
			new->max_value = -1;	/* descending seq */
1660
		new->log_cnt = 0;
1661
	}
1662

1663
	/* MINVALUE (null arg means NO MINVALUE) */
1664
	if (min_value != NULL && min_value->arg)
1665
	{
1666
		new->min_value = defGetInt64(min_value);
1667 1668
		new->log_cnt = 0;
	}
1669
	else if (isInit || min_value != NULL)
1670
	{
1671
		if (new->increment_by > 0)
B
Bruce Momjian 已提交
1672
			new->min_value = 1; /* ascending seq */
1673
		else
B
Bruce Momjian 已提交
1674
			new->min_value = SEQ_MINVALUE;		/* descending seq */
1675
		new->log_cnt = 0;
1676
	}
1677

1678
	/* crosscheck min/max */
1679
	if (new->min_value >= new->max_value)
1680
	{
B
Bruce Momjian 已提交
1681 1682 1683
		char		bufm[100],
					bufx[100];

1684 1685
		snprintf(bufm, sizeof(bufm), INT64_FORMAT, new->min_value);
		snprintf(bufx, sizeof(bufx), INT64_FORMAT, new->max_value);
1686 1687 1688 1689
		ereport(ERROR,
				(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
				 errmsg("MINVALUE (%s) must be less than MAXVALUE (%s)",
						bufm, bufx)));
1690
	}
1691

B
Bruce Momjian 已提交
1692
	/* START WITH */
1693 1694
	if (start_value != NULL)
		new->start_value = defGetInt64(start_value);
1695
	else if (isInit)
1696
	{
1697
		if (new->increment_by > 0)
1698
			new->start_value = new->min_value;	/* ascending seq */
1699
		else
1700
			new->start_value = new->max_value;	/* descending seq */
1701
	}
1702

1703 1704
	/* crosscheck START */
	if (new->start_value < new->min_value)
1705
	{
1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728
		char		bufs[100],
					bufm[100];

		snprintf(bufs, sizeof(bufs), INT64_FORMAT, new->start_value);
		snprintf(bufm, sizeof(bufm), INT64_FORMAT, new->min_value);
		ereport(ERROR,
				(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
				 errmsg("START value (%s) cannot be less than MINVALUE (%s)",
						bufs, bufm)));
	}
	if (new->start_value > new->max_value)
	{
		char		bufs[100],
					bufm[100];

		snprintf(bufs, sizeof(bufs), INT64_FORMAT, new->start_value);
		snprintf(bufm, sizeof(bufm), INT64_FORMAT, new->max_value);
		ereport(ERROR,
				(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
			  errmsg("START value (%s) cannot be greater than MAXVALUE (%s)",
					 bufs, bufm)));
	}

1729 1730 1731 1732 1733 1734 1735
	/* RESTART [WITH] */
	if (restart_value != NULL)
	{
		if (restart_value->arg != NULL)
			new->last_value = defGetInt64(restart_value);
		else
			new->last_value = new->start_value;
1736 1737 1738
		new->is_called = false;
		new->log_cnt = 1;
	}
1739
	else if (isInit)
1740
	{
1741
		new->last_value = new->start_value;
1742 1743
		new->is_called = false;
		new->log_cnt = 1;
1744
	}
1745

1746
	/* crosscheck RESTART (or current value, if changing MIN/MAX) */
1747
	if (new->last_value < new->min_value)
1748
	{
B
Bruce Momjian 已提交
1749 1750 1751
		char		bufs[100],
					bufm[100];

1752 1753
		snprintf(bufs, sizeof(bufs), INT64_FORMAT, new->last_value);
		snprintf(bufm, sizeof(bufm), INT64_FORMAT, new->min_value);
1754 1755
		ereport(ERROR,
				(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
1756
				 errmsg("RESTART value (%s) cannot be less than MINVALUE (%s)",
B
Bruce Momjian 已提交
1757
						bufs, bufm)));
1758
	}
1759
	if (new->last_value > new->max_value)
1760
	{
B
Bruce Momjian 已提交
1761 1762 1763
		char		bufs[100],
					bufm[100];

1764 1765
		snprintf(bufs, sizeof(bufs), INT64_FORMAT, new->last_value);
		snprintf(bufm, sizeof(bufm), INT64_FORMAT, new->max_value);
1766 1767
		ereport(ERROR,
				(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
1768
			  errmsg("RESTART value (%s) cannot be greater than MAXVALUE (%s)",
B
Bruce Momjian 已提交
1769
					 bufs, bufm)));
1770
	}
1771

B
Bruce Momjian 已提交
1772
	/* CACHE */
1773
	if (cache_value != NULL)
1774
	{
1775 1776 1777 1778
		new->cache_value = defGetInt64(cache_value);
		if (new->cache_value <= 0)
		{
			char		buf[100];
B
Bruce Momjian 已提交
1779

1780 1781 1782 1783 1784 1785
			snprintf(buf, sizeof(buf), INT64_FORMAT, new->cache_value);
			ereport(ERROR,
					(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
					 errmsg("CACHE (%s) must be greater than zero",
							buf)));
		}
1786
		new->log_cnt = 0;
1787
	}
1788 1789
	else if (isInit)
		new->cache_value = 1;
1790 1791
}

1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815
/*
 * Process an OWNED BY option for CREATE/ALTER SEQUENCE
 *
 * Ownership permissions on the sequence are already checked,
 * but if we are establishing a new owned-by dependency, we must
 * enforce that the referenced table has the same owner and namespace
 * as the sequence.
 */
static void
process_owned_by(Relation seqrel, List *owned_by)
{
	int			nnames;
	Relation	tablerel;
	AttrNumber	attnum;

	nnames = list_length(owned_by);
	Assert(nnames > 0);
	if (nnames == 1)
	{
		/* Must be OWNED BY NONE */
		if (strcmp(strVal(linitial(owned_by)), "none") != 0)
			ereport(ERROR,
					(errcode(ERRCODE_SYNTAX_ERROR),
					 errmsg("invalid OWNED BY option"),
B
Bruce Momjian 已提交
1816
				errhint("Specify OWNED BY table.column or OWNED BY NONE.")));
1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844
		tablerel = NULL;
		attnum = 0;
	}
	else
	{
		List	   *relname;
		char	   *attrname;
		RangeVar   *rel;

		/* Separate relname and attr name */
		relname = list_truncate(list_copy(owned_by), nnames - 1);
		attrname = strVal(lfirst(list_tail(owned_by)));

		/* Open and lock rel to ensure it won't go away meanwhile */
		rel = makeRangeVarFromNameList(relname);
		tablerel = relation_openrv(rel, AccessShareLock);

		/* Must be a regular table */
		if (tablerel->rd_rel->relkind != RELKIND_RELATION)
			ereport(ERROR,
					(errcode(ERRCODE_WRONG_OBJECT_TYPE),
					 errmsg("referenced relation \"%s\" is not a table",
							RelationGetRelationName(tablerel))));

		/* We insist on same owner and schema */
		if (seqrel->rd_rel->relowner != tablerel->rd_rel->relowner)
			ereport(ERROR,
					(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
B
Bruce Momjian 已提交
1845
					 errmsg("sequence must have same owner as table it is linked to")));
1846 1847 1848
		if (RelationGetNamespace(seqrel) != RelationGetNamespace(tablerel))
			ereport(ERROR,
					(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
P
Peter Eisentraut 已提交
1849
					 errmsg("sequence must be in same schema as table it is linked to")));
1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860

		/* Now, fetch the attribute number from the system cache */
		attnum = get_attnum(RelationGetRelid(tablerel), attrname);
		if (attnum == InvalidAttrNumber)
			ereport(ERROR,
					(errcode(ERRCODE_UNDEFINED_COLUMN),
					 errmsg("column \"%s\" of relation \"%s\" does not exist",
							attrname, RelationGetRelationName(tablerel))));
	}

	/*
B
Bruce Momjian 已提交
1861 1862
	 * OK, we are ready to update pg_depend.  First remove any existing AUTO
	 * dependencies for the sequence, then optionally add a new one.
1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884
	 */
	markSequenceUnowned(RelationGetRelid(seqrel));

	if (tablerel)
	{
		ObjectAddress refobject,
					depobject;

		refobject.classId = RelationRelationId;
		refobject.objectId = RelationGetRelid(tablerel);
		refobject.objectSubId = attnum;
		depobject.classId = RelationRelationId;
		depobject.objectId = RelationGetRelid(seqrel);
		depobject.objectSubId = 0;
		recordDependencyOn(&depobject, &refobject, DEPENDENCY_AUTO);
	}

	/* Done, but hold lock until commit */
	if (tablerel)
		relation_close(tablerel, NoLock);
}

V
Vadim B. Mikheev 已提交
1885

B
Bruce Momjian 已提交
1886
void
1887
seq_redo(XLogRecPtr beginLoc, XLogRecPtr lsn, XLogRecord *record)
V
Vadim B. Mikheev 已提交
1888
{
1889 1890
	MIRROREDLOCK_BUFMGR_DECLARE;

B
Bruce Momjian 已提交
1891 1892 1893 1894 1895 1896
	uint8		info = record->xl_info & ~XLR_INFO_MASK;
	Buffer		buffer;
	Page		page;
	char	   *item;
	Size		itemsz;
	xl_seq_rec *xlrec = (xl_seq_rec *) XLogRecGetData(record);
1897
	sequence_magic *sm;
V
Vadim B. Mikheev 已提交
1898

1899
	if (info != XLOG_SEQ_LOG)
1900
		elog(PANIC, "seq_redo: unknown op code %u", info);
1901 1902 1903 1904
	
	// -------- MirroredLock ----------
	MIRROREDLOCK_BUFMGR_LOCK;
	
1905
	buffer = XLogReadBuffer(xlrec->node, 0, true);
1906
	Assert(BufferIsValid(buffer));
V
Vadim B. Mikheev 已提交
1907 1908
	page = (Page) BufferGetPage(buffer);

1909 1910
	/* Always reinit the page and reinstall the magic number */
	/* See comments in DefineSequence */
1911 1912 1913
	PageInit((Page) page, BufferGetPageSize(buffer), sizeof(sequence_magic));
	sm = (sequence_magic *) PageGetSpecialPointer(page);
	sm->magic = SEQ_MAGIC;
V
Vadim B. Mikheev 已提交
1914

B
Bruce Momjian 已提交
1915
	item = (char *) xlrec + sizeof(xl_seq_rec);
1916
	itemsz = record->xl_len - sizeof(xl_seq_rec);
1917

B
Bruce Momjian 已提交
1918
	if (PageAddItem(page, (Item) item, itemsz,
1919
					FirstOffsetNumber, false, false) == InvalidOffsetNumber)
1920
		elog(PANIC, "seq_redo: failed to add item to page");
V
Vadim B. Mikheev 已提交
1921 1922

	PageSetLSN(page, lsn);
1923 1924
	MarkBufferDirty(buffer);
	UnlockReleaseBuffer(buffer);
1925 1926 1927 1928
	
	MIRROREDLOCK_BUFMGR_UNLOCK;
	// -------- MirroredLock ----------
	
V
Vadim B. Mikheev 已提交
1929 1930
}

B
Bruce Momjian 已提交
1931
void
1932
seq_desc(StringInfo buf, XLogRecPtr beginLoc, XLogRecord *record)
V
Vadim B. Mikheev 已提交
1933
{
1934 1935
	uint8		info = record->xl_info & ~XLR_INFO_MASK;
	char		*rec = XLogRecGetData(record);
B
Bruce Momjian 已提交
1936
	xl_seq_rec *xlrec = (xl_seq_rec *) rec;
V
Vadim B. Mikheev 已提交
1937 1938

	if (info == XLOG_SEQ_LOG)
1939
		appendStringInfo(buf, "log: ");
V
Vadim B. Mikheev 已提交
1940 1941
	else
	{
1942
		appendStringInfo(buf, "UNKNOWN");
V
Vadim B. Mikheev 已提交
1943 1944 1945
		return;
	}

1946
	appendStringInfo(buf, "rel %u/%u/%u",
B
Bruce Momjian 已提交
1947
			   xlrec->node.spcNode, xlrec->node.dbNode, xlrec->node.relNode);
V
Vadim B. Mikheev 已提交
1948
}
1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032


/*
 * Initialize a pseudo relcache entry with just enough info to call bufmgr.
 */
static void
cdb_sequence_relation_init(Relation seqrel,
                           Oid      tablespaceid,
                           Oid      dbid,
                           Oid      relid,
                           bool     istemp)
{
    /* See RelationBuildDesc in relcache.c */
    memset(seqrel, 0, sizeof(*seqrel));

    seqrel->rd_smgr = NULL;
    seqrel->rd_refcnt = 99;

    seqrel->rd_id = relid;
    seqrel->rd_istemp = istemp;

    /* Must use shared buffer pool so seqserver & QDs can see the data. */
    seqrel->rd_isLocalBuf = false;

	seqrel->rd_rel = (Form_pg_class)palloc0(CLASS_TUPLE_SIZE);
    sprintf(seqrel->rd_rel->relname.data, "pg_class.oid=%d", relid);

    /* as in RelationInitPhysicalAddr... */
    seqrel->rd_node.spcNode = tablespaceid;
    seqrel->rd_node.dbNode = dbid;
    seqrel->rd_node.relNode = relid;
}                               /* cdb_sequence_relation_init */

/*
 * Clean up pseudo relcache entry.
 */
static void
cdb_sequence_relation_term(Relation seqrel)
{
    /* Close the file. */
    RelationCloseSmgr(seqrel);

    if (seqrel->rd_rel)
        pfree(seqrel->rd_rel);
}                               /* cdb_sequence_relation_term */



/*
 * CDB: forward a nextval request from qExec to the sequence server
 */
void
cdb_sequence_nextval_proxy(Relation	seqrel,
                           int64   *plast,
                           int64   *pcached,
                           int64   *pincrement,
                           bool    *poverflow)
{

	sendSequenceRequest(GetSeqServerFD(),
						seqrel,
    					gp_session_id,
    					plast,
    					pcached,
    					pincrement,
    					poverflow);

}                               /* cdb_sequence_server_nextval */


/*
 * CDB: nextval entry point called by sequence server
 */
void
cdb_sequence_nextval_server(Oid    tablespaceid,
                            Oid    dbid,
                            Oid    relid,
                            bool   istemp,
                            int64 *plast,
                            int64 *pcached,
                            int64 *pincrement,
                            bool  *poverflow)
{
    RelationData    fakerel;
2033
	SeqTable	elm;
2034 2035 2036 2037 2038 2039
	Relation	    seqrel = &fakerel;

    *plast = 0;
    *pcached = 0;
    *pincrement = 0;

2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054
	/*
	 * In Postgres, this method is to find the SeqTable entry for the sequence.
	 * This is not required by sequence server. We only need to initialize
	 * the `elm` which is used later in `cdb_sequence_nextval()`, which
	 * is calling `read_seq_tuple()` method, and require `elm` parameter.
	 *
	 * In GPDB, a sequence server is used to generate unique values for all the sequence.
	 * It doesn't have to lock on the sequence relation, because there will be
	 * only a single instance of sequence server to handle all the requests from
	 * segments to generate the sequence values.
	 * To prevent collision of generating sequence values between 'master'
	 * (e.g.`select nextval(seq)`) and 'segments' (e.g. `insert into table with
	 * serial column`), an BUFFER_LOCK_EXCLUSIVE lock is held on the shared buffer
	 * of the sequence relation.
	 */
2055 2056
	init_sequence(relid, &elm, NULL);

2057 2058 2059 2060 2061 2062 2063
    /* Build a pseudo relcache entry with just enough info to call bufmgr. */
    seqrel = &fakerel;
    cdb_sequence_relation_init(seqrel, tablespaceid, dbid, relid, istemp);

    /* CDB TODO: Catch errors. */

    /* Update the sequence object. */
2064
    cdb_sequence_nextval(elm, seqrel, plast, pcached, pincrement, poverflow);
2065 2066 2067 2068

    /* Cleanup. */
    cdb_sequence_relation_term(seqrel);
}                               /* cdb_sequence_server_nextval */
2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079

/*
 * Mask a Sequence page before performing consistency checks on it.
 */
void
seq_mask(char *page, BlockNumber blkno)
{
	mask_page_lsn_and_checksum(page);

	mask_unused_space(page);
}