提交 20267fda 编写于 作者: A Abhijit Subramanya 提交者: Asim R P

Add XLogIsNeeded() macro.

The macro is taken from the upstream commit
40f908bd.

This commit fixes issues for CLUSTER and COPY command where the commands would
not generate necessary XLOG records when streaming replication is enabled. With
the correct use of XLogIsNeeded() this is now fixed.

This also cleans up the XLog_CanBypassWal() and XLog_UnconvertedCanBypassWal()
functions by replacing their usage with XLogIsNeeded().
Signed-off-by: NTaylor Vesely <tvesely@pivotal.io>
Signed-off-by: NAsim R P <apraveen@pivotal.io>
上级 607dcb75
......@@ -66,7 +66,6 @@ bmbuild(PG_FUNCTION_ARGS)
BMBuildState bmstate;
IndexBuildResult *result;
TupleDesc tupDesc;
bool useWal;
MIRROREDLOCK_BUFMGR_VERIFY_NO_LOCK_LEAK_ENTER;
......@@ -85,10 +84,8 @@ bmbuild(PG_FUNCTION_ARGS)
tupDesc = RelationGetDescr(index);
useWal = (!XLog_UnconvertedCanBypassWal() && !index->rd_istemp);
/* initialize the bitmap index. */
_bitmap_init(index, useWal);
_bitmap_init(index, !index->rd_istemp);
/* initialize the build state. */
_bitmap_init_buildstate(index, &bmstate);
......@@ -98,23 +95,6 @@ bmbuild(PG_FUNCTION_ARGS)
bmbuildCallback, (void *)&bmstate);
/* clean up the build state */
_bitmap_cleanup_buildstate(index, &bmstate);
/*
* fsync the relevant files to disk, unless we're building
* a temporary index
*/
if (!useWal)
{
FlushRelationBuffers(bmstate.bm_lov_heap);
smgrimmedsync(bmstate.bm_lov_heap->rd_smgr);
FlushRelationBuffers(bmstate.bm_lov_index);
smgrimmedsync(bmstate.bm_lov_index->rd_smgr);
FlushRelationBuffers(index);
/* FlushRelationBuffers will have opened rd_smgr */
smgrimmedsync(index->rd_smgr);
}
/* return statistics */
result = (IndexBuildResult *) palloc0(sizeof(IndexBuildResult));
......
......@@ -117,7 +117,8 @@ _bitmap_create_lov_heapandindex(Relation rel,
_bt_initmetapage(btree_metapage, P_NONE, 0);
/* XLOG the metapage */
if (!XLog_UnconvertedCanBypassWal() && !lovIndex->rd_istemp)
if (!lovIndex->rd_istemp)
{
// Fetch gp_persistent_relation_node information that will be added to XLOG record.
RelationFetchGpRelationNodeForXLog(lovIndex);
......
......@@ -306,7 +306,7 @@ _bitmap_init_buildstate(Relation index, BMBuildState *bmstate)
* writes page to the shared buffer, we can't disable WAL archiving.
* We will add this shortly.
*/
bmstate->use_wal = !XLog_UnconvertedCanBypassWal() && !index->rd_istemp;
bmstate->use_wal = !index->rd_istemp;
}
/*
......
......@@ -209,10 +209,10 @@ _bt_leafbuild(BTSpool *btspool, BTSpool *btspool2)
wstate.index = btspool->index;
/*
* We need to log index creation in WAL iff WAL archiving is enabled AND
* it's not a temp index.
* We need to log index creation in WAL iff WAL archiving/streaming is
* enabled AND it's not a temp index.
*/
wstate.btws_use_wal = !XLog_UnconvertedCanBypassWal() && !wstate.index->rd_istemp;
wstate.btws_use_wal = XLogIsNeeded() && !wstate.index->rd_istemp;
/* reserve the metapage */
wstate.btws_pages_alloced = BTREE_METAPAGE + 1;
......
......@@ -720,40 +720,6 @@ static bool CheckForStandbyTrigger(void);
static void GetXLogCleanUpTo(XLogRecPtr recptr, uint32 *_logId, uint32 *_logSeg);
static void checkXLogConsistency(XLogRecord *record, XLogRecPtr EndRecPtr);
/*
* Whether we need to always generate transaction log (XLOG), or if we can
* bypass it and get better performance.
*
* For GPDB, we currently do not support XLogArchivingActive(), so we don't
* use it as a condition.
*/
bool XLog_CanBypassWal(void)
{
#ifdef USE_SEGWALREP
/*
* Wal replication enabled for segments, shouldn't skip anything from
* wal.
*/
return false;
#else
/*
* We need the XLOG to be transmitted to the standby master since it is
* not using FileRep technology yet. Master also could skip some of the
* WAL operations for optimization when standby is not configured, but for
* now we lean towards safety.
*/
return GpIdentity.segindex != MASTER_CONTENT_ID;
#endif
}
/*
* For FileRep code that doesn't have the Bypass WAL logic yet.
*/
bool XLog_UnconvertedCanBypassWal(void)
{
return false;
}
static char *XLogContiguousCopy(
XLogRecord *record,
......@@ -3071,15 +3037,16 @@ XLogFileClose(void)
/*
* WAL segment files will not be re-read in normal operation, so we advise
* OS to release any cached pages. But do not do so if WAL archiving is
* active, because archiver process could use the cache to read the WAL
* segment.
* OS to release any cached pages. But do not do so if WAL archiving or
* streaming is active, because archiver process could use the cache to
* read the WAL segment. Also, don't bother with it if we are using
* O_DIRECT, since the kernel is presumably not caching in that case.
*
* While O_DIRECT works for O_SYNC, posix_fadvise() works for fsync() and
* O_SYNC, and some platforms only have posix_fadvise().
*/
#if defined(HAVE_DECL_POSIX_FADVISE) && defined(POSIX_FADV_DONTNEED)
if (!XLogArchivingActive())
if (!XLogIsNeeded())
posix_fadvise(openLogFile, 0, 0, POSIX_FADV_DONTNEED);
#endif
#endif /* NOT_USED */
......
......@@ -849,10 +849,10 @@ copy_heap_data(Oid OIDNewHeap, Oid OIDOldHeap, Oid OIDOldIndex)
LockRelationOid(OldHeap->rd_rel->reltoastrelid, AccessExclusiveLock);
/*
* We need to log the copied data in WAL iff WAL archiving is enabled AND
* it's not a temp rel.
* We need to log the copied data in WAL iff WAL archiving/streaming is
* enabled AND it's not a temp rel.
*/
use_wal = XLogArchivingActive() && !NewHeap->rd_istemp;
use_wal = XLogIsNeeded() && !NewHeap->rd_istemp;
/* use_wal off requires rd_targblock be initially invalid */
Assert(NewHeap->rd_targblock == InvalidBlockNumber);
......
......@@ -4275,7 +4275,7 @@ CopyFrom(CopyState cstate)
/*----------
* Check to see if we can avoid writing WAL
*
* If archive logging is not enabled *and* either
* If archive logging/streaming is not enabled *and* either
* - table was created in same transaction as this COPY
* - data is being written to relfilenode created in this transaction
* then we can skip writing WAL. It's safe because if the transaction
......@@ -4303,8 +4303,7 @@ CopyFrom(CopyState cstate)
cstate->rel->rd_newRelfilenodeSubid != InvalidSubTransactionId)
{
use_fsm = false;
if (!XLogArchivingActive())
use_wal = false;
use_wal = XLogIsNeeded();
}
oldcontext = MemoryContextSwitchTo(estate->es_query_cxt);
......
......@@ -1170,7 +1170,7 @@ createdb(CreatedbStmt *stmt)
PersistentFileSysRelStorageMgr localRelStorageMgr;
PersistentFileSysRelBufpoolKind relBufpoolKind;
useWal = !XLog_CanBypassWal();
useWal = XLogIsNeeded();
GpPersistentRelationNode_GetRelationInfo(
dbInfoRel->relkind,
......
......@@ -10059,7 +10059,7 @@ ATExecSetTableSpace_BufferPool(
* We need to log the copied data in WAL enabled AND
* it's not a temp rel.
*/
useWal = !XLog_CanBypassWal() && !rel->rd_istemp;
useWal = XLogIsNeeded() && !rel->rd_istemp;
if (Debug_persistent_print)
elog(Persistent_DebugPrintLevel(),
......
......@@ -4789,7 +4789,7 @@ OpenIntoRel(QueryDesc *queryDesc)
*/
bufferPoolBulkLoad =
(relstorage_is_buffer_pool(relstorage) ?
XLog_CanBypassWal() : false);
!XLogIsNeeded() : false);
/* Now we can actually create the new relation */
intoRelationId = heap_create_with_catalog(intoName,
......
......@@ -21,6 +21,7 @@
#include "utils/relcache.h"
#include "utils/timestamp.h"
#include "cdb/cdbpublic.h"
#include "replication/walsender.h"
/*
* REDO Tracking DEFINEs.
......@@ -187,18 +188,12 @@ extern bool log_checkpoints;
#define XLogArchivingActive() (XLogArchiveMode)
#define XLogArchiveCommandSet() (XLogArchiveCommand[0] != '\0')
/*
* Whether we need to always generate transaction log (XLOG), or if we can
* bypass it and get better performance.
*
* For GPDB, we do not support XLogArchivingActive(), so we don't use it as a condition.
*/
extern bool XLog_CanBypassWal(void);
/*
* For FileRep code that doesn't have the Bypass WAL logic yet.
* Is WAL-logging necessary? We need to log an XLOG record iff either
* WAL archiving is enabled or XLOG streaming is allowed.
*/
extern bool XLog_UnconvertedCanBypassWal(void);
#define XLogIsNeeded() (XLogArchivingActive() || (max_wal_senders > 0))
extern bool am_startup;
......
......@@ -1099,3 +1099,15 @@ SET gp_enable_segment_copy_checking=off;
COPY COPY_FROM_PROGRAM_ERROR FROM PROGRAM $quote$bash -c "echo -e '$GP_SEGMENT_ID\n<SEGID>'"$quote$ on segment;
COPY COPY_FROM_PROGRAM_ERROR FROM PROGRAM $quote$bash -c "echo -e '$GP_SEGMENT_I\n<SEGID>'"$quote$ on segment;
SET gp_enable_segment_copy_checking=on;
-- Test that xlog records are generated for COPY in the same transaction as
-- created table. The actual validation for this test will be performed once the
-- gp_replica_check tool is enabled.
BEGIN;
CREATE TABLE copy_from_same_txn(a int, b int);
COPY copy_from_same_txn FROM stdin;
1 1
2 2
3 3
\.
COMMIT;
......@@ -1298,3 +1298,12 @@ COPY COPY_FROM_PROGRAM_ERROR FROM PROGRAM $quote$bash -c "echo -e '$GP_SEGMENT_I
ERROR: invalid input syntax for integer: "" (seg0 172.17.0.2:40000 pid=27935)
CONTEXT: COPY copy_from_program_error, line 1, column
SET gp_enable_segment_copy_checking=on;
-- Test that xlog records are generated for COPY in the same transaction as
-- created table. The actual validation for this test will be performed once the
-- gp_replica_check tool is enabled.
BEGIN;
CREATE TABLE copy_from_same_txn(a int, b int);
NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Greenplum Database data distribution key for this table.
HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
COPY copy_from_same_txn FROM stdin;
COMMIT;
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册