提交 85fee736 编写于 作者: A Ashwin Agrawal

Pass relstorage type to smgr layer.

Without this patch the strorage layout is not known in md and smgr layer. Due
to lack of this info sub-optimal operations need to be performed generically
for all table types. For example Heap specific functions like
ForgetRelationFsyncRequests(), DropRelFileNodeBuffers() gets called even for AO
and CO tables.

Adding new RelFileNodeWithStorageType struct to carry pass storage type to md
and smgr layer. XLOG_XACT_COMMIT and XLOG_XACT_ABORT wal records use the new
structure which has RelFileNode and storage type
Co-authored-by: NDavid Kimura <dkimura@pivotal.io>
上级 3f302551
......@@ -173,7 +173,7 @@ static void RecordTransactionCommitPrepared(TransactionId xid,
int nchildren,
TransactionId *children,
int nrels,
RelFileNode *rels,
RelFileNodeWithStorageType *rels,
int ninvalmsgs,
SharedInvalidationMessage *invalmsgs,
bool initfileinval);
......@@ -181,7 +181,7 @@ static void RecordTransactionAbortPrepared(TransactionId xid,
int nchildren,
TransactionId *children,
int nrels,
RelFileNode *rels);
RelFileNodeWithStorageType *rels);
static void ProcessRecords(char *bufptr, TransactionId xid,
const TwoPhaseCallback callbacks[]);
static void RemoveGXact(GlobalTransaction gxact);
......@@ -952,8 +952,8 @@ TwoPhaseGetDummyProc(TransactionId xid)
*
* 1. TwoPhaseFileHeader
* 2. TransactionId[] (subtransactions)
* 3. RelFileNode[] (files to be deleted at commit)
* 4. RelFileNode[] (files to be deleted at abort)
* 3. RelFileNodeWithStorageType[] (files to be deleted at commit)
* 4. RelFileNodeWithStorageType[] (files to be deleted at abort)
* 5. SharedInvalidationMessage[] (inval messages to be sent at commit)
* 6. TwoPhaseRecordOnDisk
* 7. ...
......@@ -1054,8 +1054,8 @@ StartPrepare(GlobalTransaction gxact)
TransactionId xid = gxact->proc.xid;
TwoPhaseFileHeader hdr;
TransactionId *children;
RelFileNode *commitrels;
RelFileNode *abortrels;
RelFileNodeWithStorageType *commitrels;
RelFileNodeWithStorageType *abortrels;
SharedInvalidationMessage *invalmsgs;
/* Initialize linked list */
......@@ -1099,12 +1099,12 @@ StartPrepare(GlobalTransaction gxact)
}
if (hdr.ncommitrels > 0)
{
save_state_data(commitrels, hdr.ncommitrels * sizeof(RelFileNode));
save_state_data(commitrels, hdr.ncommitrels * sizeof(RelFileNodeWithStorageType));
pfree(commitrels);
}
if (hdr.nabortrels > 0)
{
save_state_data(abortrels, hdr.nabortrels * sizeof(RelFileNode));
save_state_data(abortrels, hdr.nabortrels * sizeof(RelFileNodeWithStorageType));
pfree(abortrels);
}
if (hdr.ninvalmsgs > 0)
......@@ -1312,9 +1312,9 @@ FinishPreparedTransaction(const char *gid, bool isCommit, bool raiseErrorIfNotFo
TwoPhaseFileHeader *hdr;
TransactionId latestXid;
TransactionId *children;
RelFileNode *commitrels;
RelFileNode *abortrels;
RelFileNode *delrels;
RelFileNodeWithStorageType *commitrels;
RelFileNodeWithStorageType *abortrels;
RelFileNodeWithStorageType *delrels;
int ndelrels;
SharedInvalidationMessage *invalmsgs;
int i;
......@@ -1388,10 +1388,10 @@ FinishPreparedTransaction(const char *gid, bool isCommit, bool raiseErrorIfNotFo
bufptr = buf + MAXALIGN(sizeof(TwoPhaseFileHeader));
children = (TransactionId *) bufptr;
bufptr += MAXALIGN(hdr->nsubxacts * sizeof(TransactionId));
commitrels = (RelFileNode *) bufptr;
bufptr += MAXALIGN(hdr->ncommitrels * sizeof(RelFileNode));
abortrels = (RelFileNode *) bufptr;
bufptr += MAXALIGN(hdr->nabortrels * sizeof(RelFileNode));
commitrels = (RelFileNodeWithStorageType *) bufptr;
bufptr += MAXALIGN(hdr->ncommitrels * sizeof(RelFileNodeWithStorageType));
abortrels = (RelFileNodeWithStorageType *) bufptr;
bufptr += MAXALIGN(hdr->nabortrels * sizeof(RelFileNodeWithStorageType));
invalmsgs = (SharedInvalidationMessage *) bufptr;
bufptr += MAXALIGN(hdr->ninvalmsgs * sizeof(SharedInvalidationMessage));
......@@ -1448,12 +1448,12 @@ FinishPreparedTransaction(const char *gid, bool isCommit, bool raiseErrorIfNotFo
}
for (i = 0; i < ndelrels; i++)
{
SMgrRelation srel = smgropen(delrels[i], InvalidBackendId);
SMgrRelation srel = smgropen(delrels[i].node, InvalidBackendId);
ForkNumber fork;
for (fork = 0; fork <= MAX_FORKNUM; fork++)
{
smgrdounlink(srel, fork, false);
smgrdounlink(srel, fork, false, delrels[i].relstorage);
}
smgrclose(srel);
}
......@@ -1793,8 +1793,8 @@ RecoverPreparedTransactions(void)
bufptr = buf + MAXALIGN(sizeof(TwoPhaseFileHeader));
subxids = (TransactionId *) bufptr;
bufptr += MAXALIGN(hdr->nsubxacts * sizeof(TransactionId));
bufptr += MAXALIGN(hdr->ncommitrels * sizeof(RelFileNode));
bufptr += MAXALIGN(hdr->nabortrels * sizeof(RelFileNode));
bufptr += MAXALIGN(hdr->ncommitrels * sizeof(RelFileNodeWithStorageType));
bufptr += MAXALIGN(hdr->nabortrels * sizeof(RelFileNodeWithStorageType));
bufptr += MAXALIGN(hdr->ninvalmsgs * sizeof(SharedInvalidationMessage));
/*
......@@ -1875,7 +1875,7 @@ RecordTransactionCommitPrepared(TransactionId xid,
int nchildren,
TransactionId *children,
int nrels,
RelFileNode *rels,
RelFileNodeWithStorageType *rels,
int ninvalmsgs,
SharedInvalidationMessage *invalmsgs,
bool initfileinval)
......@@ -1916,7 +1916,7 @@ RecordTransactionCommitPrepared(TransactionId xid,
{
rdata[0].next = &(rdata[1]);
rdata[1].data = (char *) rels;
rdata[1].len = nrels * sizeof(RelFileNode);
rdata[1].len = nrels * sizeof(RelFileNodeWithStorageType);
rdata[1].buffer = InvalidBuffer;
lastrdata = 1;
}
......@@ -1999,7 +1999,7 @@ RecordTransactionAbortPrepared(TransactionId xid,
int nchildren,
TransactionId *children,
int nrels,
RelFileNode *rels)
RelFileNodeWithStorageType *rels)
{
XLogRecData rdata[3];
int lastrdata = 0;
......@@ -2029,7 +2029,7 @@ RecordTransactionAbortPrepared(TransactionId xid,
{
rdata[0].next = &(rdata[1]);
rdata[1].data = (char *) rels;
rdata[1].len = nrels * sizeof(RelFileNode);
rdata[1].len = nrels * sizeof(RelFileNodeWithStorageType);
rdata[1].buffer = InvalidBuffer;
lastrdata = 1;
}
......
......@@ -1159,7 +1159,7 @@ RecordTransactionCommit(void)
bool markXidCommitted;
TransactionId latestXid = InvalidTransactionId;
int nrels;
RelFileNode *rels;
RelFileNodeWithStorageType *rels;
int nchildren;
TransactionId *children;
int nmsgs = 0;
......@@ -1295,7 +1295,7 @@ RecordTransactionCommit(void)
{
rdata[0].next = &(rdata[1]);
rdata[1].data = (char *) rels;
rdata[1].len = nrels * sizeof(RelFileNode);
rdata[1].len = nrels * sizeof(RelFileNodeWithStorageType);
rdata[1].buffer = InvalidBuffer;
lastrdata = 1;
}
......@@ -1701,7 +1701,7 @@ RecordTransactionAbort(bool isSubXact)
TransactionId xid;
TransactionId latestXid;
int nrels;
RelFileNode *rels;
RelFileNodeWithStorageType *rels;
int nchildren;
TransactionId *children;
XLogRecData rdata[3];
......@@ -1778,7 +1778,7 @@ RecordTransactionAbort(bool isSubXact)
{
rdata[0].next = &(rdata[1]);
rdata[1].data = (char *) rels;
rdata[1].len = nrels * sizeof(RelFileNode);
rdata[1].len = nrels * sizeof(RelFileNodeWithStorageType);
rdata[1].buffer = InvalidBuffer;
lastrdata = 1;
}
......@@ -5766,13 +5766,13 @@ xact_redo_commit(xl_xact_commit *xlrec, TransactionId xid, XLogRecPtr lsn,
{
for (i = 0; i < xlrec->nrels; i++)
{
SMgrRelation srel = smgropen(xlrec->xnodes[i], InvalidBackendId);
SMgrRelation srel = smgropen(xlrec->xnodes[i].node, InvalidBackendId);
ForkNumber fork;
for (fork = 0; fork <= MAX_FORKNUM; fork++)
{
XLogDropRelation(xlrec->xnodes[i], fork);
smgrdounlink(srel, fork, true);
XLogDropRelation(xlrec->xnodes[i].node, fork);
smgrdounlink(srel, fork, true, xlrec->xnodes[i].relstorage);
}
smgrclose(srel);
}
......@@ -5884,13 +5884,13 @@ xact_redo_distributed_commit(xl_xact_commit *xlrec, TransactionId xid)
for (i = 0; i < xlrec->nrels; i++)
{
SMgrRelation srel = smgropen(xlrec->xnodes[i], InvalidBackendId);
SMgrRelation srel = smgropen(xlrec->xnodes[i].node, InvalidBackendId);
ForkNumber fork;
for (fork = 0; fork <= MAX_FORKNUM; fork++)
{
XLogDropRelation(xlrec->xnodes[i], fork);
smgrdounlink(srel, fork, true);
XLogDropRelation(xlrec->xnodes[i].node, fork);
smgrdounlink(srel, fork, true, xlrec->xnodes[i].relstorage);
}
smgrclose(srel);
}
......@@ -5968,13 +5968,13 @@ xact_redo_abort(xl_xact_abort *xlrec, TransactionId xid)
/* Make sure files supposed to be dropped are dropped */
for (i = 0; i < xlrec->nrels; i++)
{
SMgrRelation srel = smgropen(xlrec->xnodes[i], InvalidBackendId);
SMgrRelation srel = smgropen(xlrec->xnodes[i].node, InvalidBackendId);
ForkNumber fork;
for (fork = 0; fork <= MAX_FORKNUM; fork++)
{
XLogDropRelation(xlrec->xnodes[i], fork);
smgrdounlink(srel, fork, true);
XLogDropRelation(xlrec->xnodes[i].node, fork);
smgrdounlink(srel, fork, true, xlrec->xnodes[i].relstorage);
}
smgrclose(srel);
}
......@@ -6068,7 +6068,7 @@ xact_desc_commit(StringInfo buf, xl_xact_commit *xlrec)
appendStringInfo(buf, "; rels:");
for (i = 0; i < xlrec->nrels; i++)
{
char *path = relpathperm(xlrec->xnodes[i], MAIN_FORKNUM);
char *path = relpathperm(xlrec->xnodes[i].node, MAIN_FORKNUM);
appendStringInfo(buf, " %s", path);
pfree(path);
......@@ -6145,7 +6145,7 @@ xact_desc_abort(StringInfo buf, xl_xact_abort *xlrec)
appendStringInfo(buf, "; rels:");
for (i = 0; i < xlrec->nrels; i++)
{
char *path = relpathperm(xlrec->xnodes[i], MAIN_FORKNUM);
char *path = relpathperm(xlrec->xnodes[i].node, MAIN_FORKNUM);
appendStringInfo(buf, " %s", path);
pfree(path);
......
......@@ -376,7 +376,7 @@ heap_create(const char *relname,
isAppendOnly = (relstorage == RELSTORAGE_AOROWS || relstorage == RELSTORAGE_AOCOLS);
RelationOpenSmgr(rel);
RelationCreateStorage(rel->rd_node, relpersistence);
RelationCreateStorage(rel->rd_node, relpersistence, relstorage);
/*
* AO tables don't use the buffer manager, better to not keep the
......
......@@ -51,7 +51,7 @@
typedef struct PendingRelDelete
{
RelFileNode relnode; /* relation that may need to be deleted */
RelFileNodeWithStorageType relnode; /* relation that may need to be deleted */
BackendId backend; /* InvalidBackendId if not a temp rel */
bool atCommit; /* T=delete at commit; F=delete at abort */
int nestLevel; /* xact nesting level of request */
......@@ -96,7 +96,7 @@ typedef struct xl_smgr_truncate
* transaction aborts later on, the storage will be destroyed.
*/
void
RelationCreateStorage(RelFileNode rnode, char relpersistence)
RelationCreateStorage(RelFileNode rnode, char relpersistence, char relstorage)
{
PendingRelDelete *pending;
SMgrRelation srel;
......@@ -131,7 +131,8 @@ RelationCreateStorage(RelFileNode rnode, char relpersistence)
/* Add the relation to the list of stuff to delete at abort */
pending = (PendingRelDelete *)
MemoryContextAlloc(TopMemoryContext, sizeof(PendingRelDelete));
pending->relnode = rnode;
pending->relnode.node = rnode;
pending->relnode.relstorage = relstorage;
pending->backend = backend;
pending->atCommit = false; /* delete if abort */
pending->nestLevel = GetCurrentTransactionNestLevel();
......@@ -174,7 +175,8 @@ RelationDropStorage(Relation rel)
/* Add the relation to the list of stuff to delete at commit */
pending = (PendingRelDelete *)
MemoryContextAlloc(TopMemoryContext, sizeof(PendingRelDelete));
pending->relnode = rel->rd_node;
pending->relnode.node = rel->rd_node;
pending->relnode.relstorage = rel->rd_rel->relstorage;
pending->backend = rel->rd_backend;
pending->atCommit = true; /* delete if commit */
pending->nestLevel = GetCurrentTransactionNestLevel();
......@@ -219,7 +221,7 @@ RelationPreserveStorage(RelFileNode rnode)
for (pending = pendingDeletes; pending != NULL; pending = next)
{
next = pending->next;
if (RelFileNodeEquals(rnode, pending->relnode))
if (RelFileNodeEquals(rnode, pending->relnode.node))
{
/* we should only find delete-on-abort entries, else trouble */
if (pending->atCommit)
......@@ -357,10 +359,10 @@ smgrDoPendingDeletes(bool isCommit)
SMgrRelation srel;
int i;
srel = smgropen(pending->relnode, pending->backend);
srel = smgropen(pending->relnode.node, pending->backend);
for (i = 0; i <= MAX_FORKNUM; i++)
{
smgrdounlink(srel, i, false);
smgrdounlink(srel, i, false, pending->relnode.relstorage);
}
smgrclose(srel);
}
......@@ -396,11 +398,11 @@ smgrDoPendingDeletes(bool isCommit)
* dropped at the end of COMMIT phase.
*/
int
smgrGetPendingDeletes(bool forCommit, RelFileNode **ptr)
smgrGetPendingDeletes(bool forCommit, RelFileNodeWithStorageType **ptr)
{
int nestLevel = GetCurrentTransactionNestLevel();
int nrels;
RelFileNode *rptr;
RelFileNodeWithStorageType *rptr;
PendingRelDelete *pending;
nrels = 0;
......@@ -420,7 +422,7 @@ smgrGetPendingDeletes(bool forCommit, RelFileNode **ptr)
*ptr = NULL;
return 0;
}
rptr = (RelFileNode *) palloc(nrels * sizeof(RelFileNode));
rptr = (RelFileNodeWithStorageType *) palloc(nrels * sizeof(RelFileNodeWithStorageType));
*ptr = rptr;
for (pending = pendingDeletes; pending != NULL; pending = pending->next)
{
......
......@@ -11531,7 +11531,8 @@ ATExecSetTableSpace(Oid tableOid, Oid newTableSpace, LOCKMODE lockmode)
* NOTE: any conflict in relfilenode value will be caught in
* RelationCreateStorage().
*/
RelationCreateStorage(newrnode, rel->rd_rel->relpersistence);
RelationCreateStorage(newrnode, rel->rd_rel->relpersistence,
rel->rd_rel->relstorage);
if (RelationIsAppendOptimized(rel))
{
......
......@@ -728,7 +728,6 @@ ProcArrayApplyRecoveryInfo(RunningTransactions running)
TransactionIdAdvance(nextXid);
if (TransactionIdFollows(nextXid, ShmemVariableCache->nextXid))
ShmemVariableCache->nextXid = nextXid;
Assert(TransactionIdIsNormal(ShmemVariableCache->latestCompletedXid));
Assert(TransactionIdIsValid(ShmemVariableCache->nextXid));
......
......@@ -394,7 +394,7 @@ mdcreate_ao(RelFileNodeBackend rnode, int32 segmentFileNum, bool isRedo)
* we are usually not in a transaction anymore when this is called.
*/
void
mdunlink(RelFileNodeBackend rnode, ForkNumber forkNum, bool isRedo)
mdunlink(RelFileNodeBackend rnode, ForkNumber forkNum, bool isRedo, char relstorage)
{
char *path;
int ret;
......@@ -404,7 +404,8 @@ mdunlink(RelFileNodeBackend rnode, ForkNumber forkNum, bool isRedo)
* relation, else the next mdsync() will fail. There can't be any such
* requests for a temp relation, though.
*/
if (!RelFileNodeBackendIsTemp(rnode))
if (!RelFileNodeBackendIsTemp(rnode) &&
!relstorage_is_ao(relstorage))
ForgetRelationFsyncRequests(rnode.node, forkNum);
path = relpath(rnode, forkNum);
......
......@@ -293,8 +293,17 @@ smgrcreate_ao(RelFileNodeBackend rnode, int32 segmentFileNum, bool isRedo)
void
smgrdounlink(SMgrRelation reln, ForkNumber forknum, bool isRedo)
smgrdounlink(SMgrRelation reln, ForkNumber forknum, bool isRedo, char relstorage)
{
/*
* AO/CO tables have only MAIN_FORKNUM we should exit early to prevent
* extra work.
*/
if (relstorage_is_ao(relstorage) &&
forknum != MAIN_FORKNUM)
return;
RelFileNodeBackend rnode = reln->smgr_rnode;
/* Close the fork */
......@@ -304,7 +313,9 @@ smgrdounlink(SMgrRelation reln, ForkNumber forknum, bool isRedo)
* Get rid of any remaining buffers for the relation. bufmgr will just
* drop them without bothering to write the contents.
*/
DropRelFileNodeBuffers(rnode, forknum, 0);
if ((relstorage != RELSTORAGE_AOROWS) &&
(relstorage != RELSTORAGE_AOCOLS))
DropRelFileNodeBuffers(rnode, forknum, 0);
/*
* It'd be nice to tell the stats collector to forget it immediately, too.
......@@ -330,7 +341,7 @@ smgrdounlink(SMgrRelation reln, ForkNumber forknum, bool isRedo)
* ERROR, because we've already decided to commit or abort the current
* xact.
*/
mdunlink(rnode, forknum, isRedo);
mdunlink(rnode, forknum, isRedo, relstorage);
}
/*
......
......@@ -2833,7 +2833,8 @@ RelationSetNewRelfilenode(Relation relation, TransactionId freezeXid)
newrnode.node = relation->rd_node;
newrnode.node.relNode = newrelfilenode;
newrnode.backend = relation->rd_backend;
RelationCreateStorage(newrnode.node, relation->rd_rel->relpersistence);
RelationCreateStorage(newrnode.node, relation->rd_rel->relpersistence,
relation->rd_rel->relstorage);
smgrclosenode(newrnode);
/*
......
......@@ -133,7 +133,7 @@ typedef struct xl_xact_commit
Oid dbId; /* MyDatabaseId */
Oid tsId; /* MyDatabaseTableSpace */
/* Array of RelFileNode(s) to drop at commit */
RelFileNode xnodes[1]; /* VARIABLE LENGTH ARRAY */
RelFileNodeWithStorageType xnodes[1]; /* VARIABLE LENGTH ARRAY */
/* ARRAY OF COMMITTED SUBTRANSACTION XIDs FOLLOWS */
/* ARRAY OF SHARED INVALIDATION MESSAGES FOLLOWS */
/* DISTRIBUTED XACT STUFF FOLLOWS */
......@@ -163,7 +163,7 @@ typedef struct xl_xact_abort
int nrels; /* number of RelFileNodes */
int nsubxacts; /* number of subtransaction XIDs */
/* Array of RelFileNode(s) to drop at abort */
RelFileNode xnodes[1]; /* VARIABLE LENGTH ARRAY */
RelFileNodeWithStorageType xnodes[1]; /* VARIABLE LENGTH ARRAY */
/* ARRAY OF ABORTED SUBTRANSACTION XIDs FOLLOWS */
} xl_xact_abort;
......
......@@ -20,7 +20,7 @@
#include "storage/relfilenode.h"
#include "utils/relcache.h"
extern void RelationCreateStorage(RelFileNode rnode, char relpersistence);
extern void RelationCreateStorage(RelFileNode rnode, char relpersistence, char relstorage);
extern void RelationDropStorage(Relation rel);
extern void RelationPreserveStorage(RelFileNode rnode);
extern void RelationTruncate(Relation rel, BlockNumber nblocks);
......@@ -30,7 +30,7 @@ extern void RelationTruncate(Relation rel, BlockNumber nblocks);
* naming
*/
extern void smgrDoPendingDeletes(bool isCommit);
extern int smgrGetPendingDeletes(bool forCommit, RelFileNode **ptr);
extern int smgrGetPendingDeletes(bool forCommit, RelFileNodeWithStorageType **ptr);
extern void AtSubCommit_smgr(void);
extern void AtSubAbort_smgr(void);
extern void PostPrepare_smgr(void);
......
......@@ -124,4 +124,16 @@ inline static bool RelFileNode_IsEmpty(
relFileNode->relNode == 0);
}
/*
* Augmenting a relfilenode with a storeage type provides a way to make optimal
* decisions in smgr and md layer. This is purposefully kept out of RelFileNode
* for performance concerns where RelFileNode used in a hotpath for BufferTag
* hashing.
*/
typedef struct RelFileNodeWithStorageType
{
RelFileNode node;
char relstorage;
} RelFileNodeWithStorageType;
#endif /* RELFILENODE_H */
......@@ -86,7 +86,7 @@ extern void smgrclosenode(RelFileNodeBackend rnode);
extern void smgrcreate(SMgrRelation reln, ForkNumber forknum, bool isRedo);
extern void smgrcreate_ao(RelFileNodeBackend rnode, int32 segmentFileNum, bool isRedo);
extern void smgrdounlink(SMgrRelation reln, ForkNumber forknum,
bool isRedo);
bool isRedo, char relstorage);
extern void smgrextend(SMgrRelation reln, ForkNumber forknum,
BlockNumber blocknum, char *buffer, bool skipFsync);
extern void smgrprefetch(SMgrRelation reln, ForkNumber forknum,
......@@ -112,7 +112,7 @@ extern void mdclose(SMgrRelation reln, ForkNumber forknum);
extern void mdcreate(SMgrRelation reln, ForkNumber forknum, bool isRedo);
extern void mdcreate_ao(RelFileNodeBackend rnode, int32 segmentFileNum, bool isRedo);
extern bool mdexists(SMgrRelation reln, ForkNumber forknum);
extern void mdunlink(RelFileNodeBackend rnode, ForkNumber forknum, bool isRedo);
extern void mdunlink(RelFileNodeBackend rnode, ForkNumber forknum, bool isRedo, char relstorage);
extern void mdextend(SMgrRelation reln, ForkNumber forknum,
BlockNumber blocknum, char *buffer, bool skipFsync);
extern void mdprefetch(SMgrRelation reln, ForkNumber forknum,
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册