未验证 提交 db60b003 编写于 作者: ( (Jerome)Junfeng Yang 提交者: GitHub

Fix flaky test for replication_keeps_crash. (#10423)

Remove the set `gp_fts_probe_retries to 1` which may cause FTS probe failed.
This was first added to reduce the test time, but set a lower retry
value may cause the test failed to probe FTS update segment
configuration. Since reduce the `gp_fts_replication_attempt_count` also
save the test time, so skip alter ``gp_fts_probe_retries`.

Also find an assertion may not match when mark mirror down happens before
walsender exit, which will free the replication status before walsender
exit and try to record disconnect info. Which lead the segment crash
and starts recover.
上级 81810a20
......@@ -291,10 +291,13 @@ FTSReplicationStatusMarkDisconnectForReplication(const char *app_name)
LWLockAcquire(FTSReplicationStatusLock, LW_SHARED);
replication_status = RetrieveFTSReplicationStatus(app_name, false /* skip_warn */);
/*
* FTS may already mark the mirror down and free the replication status.
* For this case, a NULL pointer will return.
*/
replication_status = RetrieveFTSReplicationStatus(app_name, true /* skip_warn */);
/* replication_status must exist */
Assert(replication_status);
/* if replication_status is NULL, do nothing */
FTSReplicationStatusMarkDisconnect(replication_status);
LWLockRelease(FTSReplicationStatusLock);
......
......@@ -15,8 +15,6 @@ CREATE
-- modify fts gucs to speed up the test.
1: alter system set gp_fts_probe_interval to 10;
ALTER
1: alter system set gp_fts_probe_retries to 1;
ALTER
1: alter system set gp_fts_replication_attempt_count to 3;
ALTER
1: select pg_reload_conf();
......@@ -49,6 +47,12 @@ select gp_inject_fault_infinite('wal_sender_loop', 'error', dbid) from gp_segmen
-- LSN to be flushed on mirror.
1&: create table mirror_block_t1 (a int) distributed by (a); <waiting ...>
select gp_wait_until_triggered_fault('wal_sender_loop', 1, dbid) from gp_segment_configuration where role='p' and content=0;
gp_wait_until_triggered_fault
-------------------------------
Success:
(1 row)
-- trigger fts to mark mirror down.
select gp_request_fts_probe_scan();
gp_request_fts_probe_scan
......@@ -108,8 +112,6 @@ DROP
1: alter system reset gp_fts_probe_interval;
ALTER
1: alter system reset gp_fts_probe_retries;
ALTER
1: alter system reset gp_fts_replication_attempt_count;
ALTER
1: select pg_reload_conf();
......
......@@ -13,7 +13,6 @@ include: helpers/server_helpers.sql;
-- modify fts gucs to speed up the test.
1: alter system set gp_fts_probe_interval to 10;
1: alter system set gp_fts_probe_retries to 1;
1: alter system set gp_fts_replication_attempt_count to 3;
1: select pg_reload_conf();
......@@ -27,6 +26,8 @@ select gp_inject_fault_infinite('wal_sender_loop', 'error', dbid)
-- LSN to be flushed on mirror.
1&: create table mirror_block_t1 (a int) distributed by (a);
select gp_wait_until_triggered_fault('wal_sender_loop', 1, dbid) from gp_segment_configuration where role='p' and content=0;
-- trigger fts to mark mirror down.
select gp_request_fts_probe_scan();
......@@ -55,6 +56,5 @@ SELECT role, preferred_role, content, mode, status FROM gp_segment_configuration
drop table mirror_block_t1;
1: alter system reset gp_fts_probe_interval;
1: alter system reset gp_fts_probe_retries;
1: alter system reset gp_fts_replication_attempt_count;
1: select pg_reload_conf();
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册