diff --git a/src/backend/cdb/cdbvars.c b/src/backend/cdb/cdbvars.c index 73c3855a714a81d94bf01b99752d916b7a54354d..04d973b84dcb3101e145ac76e10afbeb45aa2b36 100644 --- a/src/backend/cdb/cdbvars.c +++ b/src/backend/cdb/cdbvars.c @@ -134,17 +134,11 @@ int gp_fts_probe_timeout = 20; int gp_fts_probe_interval = 60; /* - * Number of threads to use for probe of segments (it is a good idea to have this - * larger than the number of segments per host. + * If mirror disconnects and re-connects between this period, or just takes + * this much time during initial connection of cluster start, it will not get + * reported as down to FTS. */ -int gp_fts_probe_threadcount = 16; - -/* The number of retries to request a segment state transition. */ -int gp_fts_transition_retries = 5; - -/* Timeout to request a segment state transition. */ -int gp_fts_transition_timeout = 3600; - +int gp_fts_mark_mirror_down_grace_period = 30; /* * When we have certain types of failures during gang creation which indicate diff --git a/src/backend/replication/gp_replication.c b/src/backend/replication/gp_replication.c index 924b13770320a48df4a4c97692c7f34daba3828b..5067f6c58cf17cf60196a031a19855cbeb577a4c 100644 --- a/src/backend/replication/gp_replication.c +++ b/src/backend/replication/gp_replication.c @@ -13,6 +13,7 @@ #include "postgres.h" #include "pgtime.h" +#include "cdb/cdbvars.h" #include "replication/gp_replication.h" #include "replication/walreceiver.h" #include "replication/walsender_private.h" @@ -21,13 +22,6 @@ /* Set at database system is ready to accept connections */ extern pg_time_t PMAcceptingConnectionsStartTime; -/* - * If mirror disconnects and re-connects between this period, or just takes - * this much time during initial connection of cluster start, it will not get - * reported as down to FTS. - */ -#define FTS_MARKING_MIRROR_DOWN_GRACE_PERIOD 30 /* secs */ - /* * Check the WalSndCtl to obtain if mirror is up or down, if the wal sender is * in streaming, and if synchronous replication is enabled or not. @@ -71,11 +65,11 @@ GetMirrorStatus(FtsResponse *response) * glitch. During this period, request FTS to probe again. * * If the delta is negative, then it's overflowed, meaning it's - * over FTS_MARKING_MIRROR_DOWN_GRACE_PERIOD since either last + * over gp_fts_mark_mirror_down_grace_period since either last * database accepting connections or last time wal sender * died. Then, we can safely mark the mirror is down. */ - if (delta < FTS_MARKING_MIRROR_DOWN_GRACE_PERIOD && delta >= 0) + if (delta < gp_fts_mark_mirror_down_grace_period && delta >= 0) { ereport(LOG, (errmsg("requesting fts retry as mirror didn't connect yet but in grace period: " INT64_FORMAT, delta), diff --git a/src/backend/replication/test/gp_replication_test.c b/src/backend/replication/test/gp_replication_test.c index 6ff5d8d5aec00d3a737f339c5540163782495c81..dc66e4ad8f00bd140937dd48c54fd6fc1dcb35fd 100644 --- a/src/backend/replication/test/gp_replication_test.c +++ b/src/backend/replication/test/gp_replication_test.c @@ -79,7 +79,7 @@ test_GetMirrorStatus_Pid_Zero(void **state) * duration is taken into account. */ data.walsnds[0].marked_pid_zero_at_time = - ((pg_time_t) time(NULL)) - FTS_MARKING_MIRROR_DOWN_GRACE_PERIOD; + ((pg_time_t) time(NULL)) - gp_fts_mark_mirror_down_grace_period; /* * Ensure the recovery finished before wal sender died. @@ -107,12 +107,12 @@ test_GetMirrorStatus_RequestRetry(void **state) * Make the pid zero time within the grace period. */ data.walsnds[0].marked_pid_zero_at_time = - ((pg_time_t) time(NULL)) - FTS_MARKING_MIRROR_DOWN_GRACE_PERIOD/2; + ((pg_time_t) time(NULL)) - gp_fts_mark_mirror_down_grace_period/2; /* * Ensure recovery finished before wal sender died. */ - PMAcceptingConnectionsStartTime = data.walsnds[0].marked_pid_zero_at_time - FTS_MARKING_MIRROR_DOWN_GRACE_PERIOD; + PMAcceptingConnectionsStartTime = data.walsnds[0].marked_pid_zero_at_time - gp_fts_mark_mirror_down_grace_period; expect_lwlock(LW_SHARED); expect_ereport(); @@ -138,14 +138,14 @@ test_GetMirrorStatus_Delayed_AcceptingConnectionsStartTime(void **state) * Mirror will be marked down, and no retry. */ data.walsnds[0].marked_pid_zero_at_time = - ((pg_time_t) time(NULL)) - FTS_MARKING_MIRROR_DOWN_GRACE_PERIOD; + ((pg_time_t) time(NULL)) - gp_fts_mark_mirror_down_grace_period; /* * However the database was in recovery, hence * we are still within the grace period, and * we should retry. */ - PMAcceptingConnectionsStartTime = ((pg_time_t) time(NULL)) - FTS_MARKING_MIRROR_DOWN_GRACE_PERIOD/2; + PMAcceptingConnectionsStartTime = ((pg_time_t) time(NULL)) - gp_fts_mark_mirror_down_grace_period/2; expect_lwlock(LW_SHARED); expect_ereport(); diff --git a/src/backend/utils/misc/guc_gp.c b/src/backend/utils/misc/guc_gp.c index f5cb7084eb1926cc034bfd58af87422dbfe57ea4..2af5d1fa6008da72a4f9ea91ab3b77101eff1b3f 100644 --- a/src/backend/utils/misc/guc_gp.c +++ b/src/backend/utils/misc/guc_gp.c @@ -3502,6 +3502,16 @@ struct config_int ConfigureNamesInt_gp[] = 60, 10, 3600, NULL, NULL }, + { + {"gp_fts_mark_mirror_down_grace_period", PGC_SIGHUP, GP_ARRAY_TUNING, + gettext_noop("Time (in seconds) allowed to mirror after disconnection, to reconnect before being marked as down in configuration by FTS."), + gettext_noop("Used by the fts-probe process."), + GUC_UNIT_S + }, + &gp_fts_mark_mirror_down_grace_period, + 30, 0, 3600, NULL, NULL + }, + { {"gp_gang_creation_retry_count", PGC_USERSET, GP_ARRAY_TUNING, gettext_noop("After a gang-creation fails, retry the number of times if failure is retryable."), diff --git a/src/include/cdb/cdbvars.h b/src/include/cdb/cdbvars.h index e973d7249118f1b2531ebce26c1639986a097fb1..1543b18f156c138a3e0f898b0e7c3c2c9da517cd 100644 --- a/src/include/cdb/cdbvars.h +++ b/src/include/cdb/cdbvars.h @@ -346,7 +346,7 @@ extern int gp_snapshotadd_timeout; /* GUC var - timeout specifier for snapshot-c extern int gp_fts_probe_retries; /* GUC var - specifies probe number of retries for FTS */ extern int gp_fts_probe_timeout; /* GUC var - specifies probe timeout for FTS */ extern int gp_fts_probe_interval; /* GUC var - specifies polling interval for FTS */ -extern int gp_fts_probe_threadcount; /* GUC var - specifies number of threads to use for FTS probes */ +extern int gp_fts_mark_mirror_down_grace_period; extern int gp_gang_creation_retry_count; /* How many retries ? */ extern int gp_gang_creation_retry_timer; /* How long between retries */