提交 6d829d98 编写于 作者: P Paul Guo

Retry more for replication synchronization waiting to avoid isolation2 test flakiness. (#10281)

Some test cases have been failing due to too few retries. Let's increase them and also
create some common UDF for use.
Reviewed-by: NHubert Zhang <hzhang@pivotal.io>
Reviewed-by: NAshwin Agrawal <aagrawal@pivotal.io>

Cherry-picked from ca360700
上级 8f753424
......@@ -195,8 +195,11 @@ select pg_ctl((select datadir from gp_segment_configuration c where c.role='m' a
(exited with code 0)
-- loop while segments come in sync
do $$ begin /* in func */ for i in 1..120 loop /* in func */ if (select count(*) = 2 from gp_segment_configuration where content in (0, 1) and mode = 's' and role = 'p') then /* in func */ return; /* in func */ end if; /* in func */ perform gp_request_fts_probe_scan(); /* in func */ end loop; /* in func */ end; /* in func */ $$;
DO
select wait_until_all_segments_synchronized();
wait_until_all_segments_synchronized
--------------------------------------
OK
(1 row)
!\retcode gprecoverseg -ar;
-- start_ignore
......@@ -204,8 +207,11 @@ DO
(exited with code 0)
-- loop while segments come in sync
do $$ begin /* in func */ for i in 1..120 loop /* in func */ if (select count(*) = 2 from gp_segment_configuration where content in (0, 1) and mode = 's' and role = 'p') then /* in func */ return; /* in func */ end if; /* in func */ perform gp_request_fts_probe_scan(); /* in func */ end loop; /* in func */ end; /* in func */ $$;
DO
select wait_until_all_segments_synchronized();
wait_until_all_segments_synchronized
--------------------------------------
OK
(1 row)
-- verify no segment is down after recovery
select count(*) from gp_segment_configuration where status = 'd';
......
......@@ -61,7 +61,7 @@ select gp_inject_fault('finish_prepared_start_of_function', 'reset', 2);
Success:
(1 row)
-- loop to reach waiting_reason=replication
0U: select wait_for_replication(200);
0U: select wait_for_replication(1200);
wait_for_replication
----------------------
t
......
......@@ -121,11 +121,8 @@ select reinitialize_standby();
-- end_ignore
-- Sync state between master and standby must be restored at the end.
do $$ begin /* in func */ for i in 1..120 loop /* in func */ if (select count(*) = 1 from pg_stat_replication) then /* in func */ return; /* in func */ end if; /* in func */ perform pg_sleep(0.1); /* in func */ end loop; /* in func */ end; /* in func */ $$;
DO
select application_name, state from pg_stat_replication;
application_name | state
------------------+-----------
gp_walreceiver | streaming
select wait_until_master_standby_insync();
wait_until_master_standby_insync
----------------------------------
OK
(1 row)
......@@ -80,8 +80,11 @@ SELECT count(*) FROM t;
(exited with code 0)
-- loop while segments come in sync
do $$ begin /* in func */ for i in 1..120 loop /* in func */ if (select count(*) = 2 from gp_segment_configuration where content = 1 and mode = 's') then /* in func */ return; /* in func */ end if; /* in func */ perform gp_request_fts_probe_scan(); /* in func */ end loop; /* in func */ end; /* in func */ $$;
DO
select wait_until_all_segments_synchronized();
wait_until_all_segments_synchronized
--------------------------------------
OK
(1 row)
!\retcode gprecoverseg -ar;
-- start_ignore
......@@ -89,8 +92,11 @@ DO
(exited with code 0)
-- loop while segments come in sync
do $$ begin /* in func */ for i in 1..120 loop /* in func */ if (select count(*) = 2 from gp_segment_configuration where content = 1 and mode = 's') then /* in func */ return; /* in func */ end if; /* in func */ perform gp_request_fts_probe_scan(); /* in func */ end loop; /* in func */ end; /* in func */ $$;
DO
select wait_until_all_segments_synchronized();
wait_until_all_segments_synchronized
--------------------------------------
OK
(1 row)
-- verify no segment is down after recovery
1:SELECT COUNT(*) FROM gp_segment_configuration WHERE status = 'd';
......
-- This test assumes 3 primaries and 3 mirrors from a gpdemo segwalrep cluster
-- function to wait for mirror to come up in sync (1 minute timeout)
create or replace function wait_for_streaming(contentid smallint) returns void as $$ declare updated bool; /* in func */ begin /* in func */ for i in 1 .. 120 loop /* in func */ perform gp_request_fts_probe_scan(); /* in func */ select (mode = 's' and status = 'u') into updated /* in func */ from gp_segment_configuration /* in func */ where content = contentid and role = 'm'; /* in func */ exit when updated; /* in func */ perform pg_sleep(0.5); /* in func */ end loop; /* in func */ end; /* in func */ $$ language plpgsql;
CREATE
include: helpers/server_helpers.sql;
CREATE
......@@ -193,10 +188,10 @@ select gp_inject_fault('initialize_wal_sender', 'reset', dbid) from gp_segment_c
-----------------
Success:
(1 row)
select wait_for_streaming(2::smallint);
wait_for_streaming
--------------------
select wait_until_segment_synchronized(2);
wait_until_segment_synchronized
---------------------------------
OK
(1 row)
select content, role, preferred_role, mode, status from gp_segment_configuration where content=2;
content | role | preferred_role | mode | status
......
......@@ -68,8 +68,11 @@ select content, preferred_role, role, status, mode from gp_segment_configuration
(exited with code 0)
-- loop while segments come in sync
do $$ begin /* in func */ for i in 1..120 loop /* in func */ if (select mode = 's' from gp_segment_configuration where content = 0 limit 1) then /* in func */ return; /* in func */ end if; /* in func */ perform gp_request_fts_probe_scan(); /* in func */ end loop; /* in func */ end; /* in func */ $$;
DO
select wait_until_all_segments_synchronized();
wait_until_all_segments_synchronized
--------------------------------------
OK
(1 row)
-- expect: to see roles flipped and in sync
select content, preferred_role, role, status, mode from gp_segment_configuration where content = 0;
......@@ -177,8 +180,11 @@ drop tablespace mirror_promotion_tablespace;
DROP
-- loop while segments come in sync
do $$ begin /* in func */ for i in 1..120 loop /* in func */ if (select mode = 's' from gp_segment_configuration where content = 0 limit 1) then /* in func */ return; /* in func */ end if; /* in func */ perform gp_request_fts_probe_scan(); /* in func */ end loop; /* in func */ end; /* in func */ $$;
DO
select wait_until_all_segments_synchronized();
wait_until_all_segments_synchronized
--------------------------------------
OK
(1 row)
-- now, the content 0 primary and mirror should be at their preferred role
-- and up and in-sync
......
......@@ -116,8 +116,11 @@ SET
(exited with code 0)
-- loop while segments come in sync
do $$ begin /* in func */ for i in 1..120 loop /* in func */ if (select count(*) = 0 from gp_segment_configuration where content != -1 and mode != 's') then /* in func */ return; /* in func */ end if; /* in func */ perform gp_request_fts_probe_scan(); /* in func */ end loop; /* in func */ end; /* in func */ $$;
DO
select wait_until_all_segments_synchronized();
wait_until_all_segments_synchronized
--------------------------------------
OK
(1 row)
-- rebalance the cluster
!\retcode gprecoverseg -ar;
......@@ -126,8 +129,11 @@ DO
(exited with code 0)
-- loop while segments come in sync
do $$ begin /* in func */ for i in 1..120 loop /* in func */ if (select count(*) = 0 from gp_segment_configuration where content != -1 and mode != 's') then /* in func */ return; /* in func */ end if; /* in func */ perform gp_request_fts_probe_scan(); /* in func */ end loop; /* in func */ end; /* in func */ $$;
DO
select wait_until_all_segments_synchronized();
wait_until_all_segments_synchronized
--------------------------------------
OK
(1 row)
-- recheck gp_segment_configuration after rebalance
SELECT dbid, role, preferred_role, content, mode, status FROM gp_segment_configuration order by dbid;
......
......@@ -22,6 +22,9 @@
--
-- end_matchsubs
include: helpers/server_helpers.sql;
CREATE
CREATE extension IF NOT EXISTS gp_inject_fault;
CREATE
1:set dtx_phase2_retry_count=10;
......@@ -238,8 +241,11 @@ DROP
(exited with code 0)
-- loop while segments come in sync
do $$ begin /* in func */ for i in 1..120 loop /* in func */ if (select count(*) = 0 from gp_segment_configuration where content != -1 and mode != 's') then /* in func */ return; /* in func */ end if; /* in func */ perform gp_request_fts_probe_scan(); /* in func */ end loop; /* in func */ end; /* in func */ $$;
DO
select wait_until_all_segments_synchronized();
wait_until_all_segments_synchronized
--------------------------------------
OK
(1 row)
5:SELECT role, preferred_role, content FROM gp_segment_configuration;
role | preferred_role | content
------+----------------+---------
......
......@@ -127,3 +127,41 @@ $$ language plpgsql;
create or replace function master() returns setof gp_segment_configuration as $$
select * from gp_segment_configuration where role='p' and content=-1;
$$ language sql;
create or replace function wait_until_segment_synchronized(segment_number int) returns text as $$
begin
for i in 1..600 loop
if (select count(*) = 0 from gp_segment_configuration where content = segment_number and mode != 's') then
return 'OK';
end if;
perform pg_sleep(0.1);
perform gp_request_fts_probe_scan();
end loop;
return 'Fail';
end;
$$ language plpgsql;
create or replace function wait_until_all_segments_synchronized() returns text as $$
begin
for i in 1..600 loop
if (select count(*) = 0 from gp_segment_configuration where content != -1 and mode != 's') then
return 'OK';
end if;
perform pg_sleep(0.1);
perform gp_request_fts_probe_scan();
end loop;
return 'Fail';
end;
$$ language plpgsql;
create or replace function wait_until_master_standby_insync() returns text as $$
begin
for i in 1..1200 loop
if (select count(*) = 1 from pg_stat_replication) then
return 'OK';
end if;
perform pg_sleep(0.1);
end loop;
return 'Fail';
end;
$$ language plpgsql;
-- Tests FTS can handle DNS error.
create extension if not exists gp_inject_fault;
include: helpers/server_helpers.sql;
-- to make test deterministic and fast
!\retcode gpconfig -c gp_fts_probe_retries -v 2 --masteronly;
......@@ -32,30 +34,12 @@ select count(*) from gp_segment_configuration where status = 'd';
!\retcode gprecoverseg -aF --no-progress;
-- loop while segments come in sync
do $$
begin /* in func */
for i in 1..120 loop /* in func */
if (select mode = 's' from gp_segment_configuration where content = 0 limit 1) then /* in func */
return; /* in func */
end if; /* in func */
perform gp_request_fts_probe_scan(); /* in func */
end loop; /* in func */
end; /* in func */
$$;
select wait_until_all_segments_synchronized()
!\retcode gprecoverseg -ar;
-- loop while segments come in sync
do $$
begin /* in func */
for i in 1..120 loop /* in func */
if (select mode = 's' from gp_segment_configuration where content = 0 limit 1) then /* in func */
return; /* in func */
end if; /* in func */
perform gp_request_fts_probe_scan(); /* in func */
end loop; /* in func */
end; /* in func */
$$;
select wait_until_all_segments_synchronized()
-- verify no segment is down after recovery
select count(*) from gp_segment_configuration where status = 'd';
......
......@@ -129,30 +129,12 @@ where c.role='m' and c.content=0), 'stop');
!\retcode gprecoverseg -aF --no-progress;
-- loop while segments come in sync
do $$
begin /* in func */
for i in 1..120 loop /* in func */
if (select count(*) = 2 from gp_segment_configuration where content in (0, 1) and mode = 's' and role = 'p') then /* in func */
return; /* in func */
end if; /* in func */
perform gp_request_fts_probe_scan(); /* in func */
end loop; /* in func */
end; /* in func */
$$;
select wait_until_all_segments_synchronized();
!\retcode gprecoverseg -ar;
-- loop while segments come in sync
do $$
begin /* in func */
for i in 1..120 loop /* in func */
if (select count(*) = 2 from gp_segment_configuration where content in (0, 1) and mode = 's' and role = 'p') then /* in func */
return; /* in func */
end if; /* in func */
perform gp_request_fts_probe_scan(); /* in func */
end loop; /* in func */
end; /* in func */
$$;
select wait_until_all_segments_synchronized();
-- verify no segment is down after recovery
select count(*) from gp_segment_configuration where status = 'd';
......
......@@ -34,7 +34,7 @@ select gp_inject_fault_infinite('wal_sender_loop', 'suspend', 2);
-- let the transaction move forward with the commit
select gp_inject_fault('finish_prepared_start_of_function', 'reset', 2);
-- loop to reach waiting_reason=replication
0U: select wait_for_replication(200);
0U: select wait_for_replication(1200);
-- hitting this fault, is checked for test validation
select gp_inject_fault_infinite('sync_rep_query_cancel', 'skip', 2);
0U: select pg_cancel_backend(pid) from pg_stat_activity where waiting_reason='replication' and sess_id in (select sess_id from store_session_id);
......
......@@ -98,15 +98,4 @@ select reinitialize_standby();
-- end_ignore
-- Sync state between master and standby must be restored at the end.
do $$
begin /* in func */
for i in 1..120 loop /* in func */
if (select count(*) = 1 from pg_stat_replication) then /* in func */
return; /* in func */
end if; /* in func */
perform pg_sleep(0.1); /* in func */
end loop; /* in func */
end; /* in func */
$$;
select application_name, state from pg_stat_replication;
select wait_until_master_standby_insync();
......@@ -37,30 +37,12 @@ SELECT count(*) FROM t;
!\retcode gprecoverseg -a;
-- loop while segments come in sync
do $$
begin /* in func */
for i in 1..120 loop /* in func */
if (select count(*) = 2 from gp_segment_configuration where content = 1 and mode = 's') then /* in func */
return; /* in func */
end if; /* in func */
perform gp_request_fts_probe_scan(); /* in func */
end loop; /* in func */
end; /* in func */
$$;
select wait_until_all_segments_synchronized();
!\retcode gprecoverseg -ar;
-- loop while segments come in sync
do $$
begin /* in func */
for i in 1..120 loop /* in func */
if (select count(*) = 2 from gp_segment_configuration where content = 1 and mode = 's') then /* in func */
return; /* in func */
end if; /* in func */
perform gp_request_fts_probe_scan(); /* in func */
end loop; /* in func */
end; /* in func */
$$;
select wait_until_all_segments_synchronized();
-- verify no segment is down after recovery
1:SELECT COUNT(*) FROM gp_segment_configuration WHERE status = 'd';
......
-- This test assumes 3 primaries and 3 mirrors from a gpdemo segwalrep cluster
-- function to wait for mirror to come up in sync (1 minute timeout)
create or replace function wait_for_streaming(contentid smallint)
returns void as $$
declare
updated bool; /* in func */
begin /* in func */
for i in 1 .. 120 loop /* in func */
perform gp_request_fts_probe_scan(); /* in func */
select (mode = 's' and status = 'u') into updated /* in func */
from gp_segment_configuration /* in func */
where content = contentid and role = 'm'; /* in func */
exit when updated; /* in func */
perform pg_sleep(0.5); /* in func */
end loop; /* in func */
end; /* in func */
$$ language plpgsql;
include: helpers/server_helpers.sql;
......@@ -96,7 +78,7 @@ select content, role, preferred_role, mode, status from gp_segment_configuration
-- let the walsender proceed
select gp_inject_fault('initialize_wal_sender', 'reset', dbid)
from gp_segment_configuration where role='p' and content=2;
select wait_for_streaming(2::smallint);
select wait_until_segment_synchronized(2);
select content, role, preferred_role, mode, status from gp_segment_configuration where content=2;
-- everything is back to normal
......
......@@ -38,16 +38,7 @@ where content = 0;
!\retcode gprecoverseg -aF --no-progress;
-- loop while segments come in sync
do $$
begin /* in func */
for i in 1..120 loop /* in func */
if (select mode = 's' from gp_segment_configuration where content = 0 limit 1) then /* in func */
return; /* in func */
end if; /* in func */
perform gp_request_fts_probe_scan(); /* in func */
end loop; /* in func */
end; /* in func */
$$;
select wait_until_all_segments_synchronized();
-- expect: to see roles flipped and in sync
select content, preferred_role, role, status, mode
......@@ -100,16 +91,7 @@ drop table mirror_promotion_tblspc_heap_table;
drop tablespace mirror_promotion_tablespace;
-- loop while segments come in sync
do $$
begin /* in func */
for i in 1..120 loop /* in func */
if (select mode = 's' from gp_segment_configuration where content = 0 limit 1) then /* in func */
return; /* in func */
end if; /* in func */
perform gp_request_fts_probe_scan(); /* in func */
end loop; /* in func */
end; /* in func */
$$;
select wait_until_all_segments_synchronized();
-- now, the content 0 primary and mirror should be at their preferred role
-- and up and in-sync
......
......@@ -77,31 +77,13 @@ set allow_system_table_mods to false;
!\retcode gprecoverseg -a;
-- loop while segments come in sync
do $$
begin /* in func */
for i in 1..120 loop /* in func */
if (select count(*) = 0 from gp_segment_configuration where content != -1 and mode != 's') then /* in func */
return; /* in func */
end if; /* in func */
perform gp_request_fts_probe_scan(); /* in func */
end loop; /* in func */
end; /* in func */
$$;
select wait_until_all_segments_synchronized();
-- rebalance the cluster
!\retcode gprecoverseg -ar;
-- loop while segments come in sync
do $$
begin /* in func */
for i in 1..120 loop /* in func */
if (select count(*) = 0 from gp_segment_configuration where content != -1 and mode != 's') then /* in func */
return; /* in func */
end if; /* in func */
perform gp_request_fts_probe_scan(); /* in func */
end loop; /* in func */
end; /* in func */
$$;
select wait_until_all_segments_synchronized();
-- recheck gp_segment_configuration after rebalance
SELECT dbid, role, preferred_role, content, mode, status FROM gp_segment_configuration order by dbid;
......
......@@ -22,6 +22,8 @@
--
-- end_matchsubs
include: helpers/server_helpers.sql;
CREATE extension IF NOT EXISTS gp_inject_fault;
1:set dtx_phase2_retry_count=10;
!\retcode gpconfig -c gp_fts_probe_retries -v 2 --masteronly;
......@@ -107,14 +109,5 @@ CREATE extension IF NOT EXISTS gp_inject_fault;
!\retcode gpstop -u;
-- loop while segments come in sync
do $$
begin /* in func */
for i in 1..120 loop /* in func */
if (select count(*) = 0 from gp_segment_configuration where content != -1 and mode != 's') then /* in func */
return; /* in func */
end if; /* in func */
perform gp_request_fts_probe_scan(); /* in func */
end loop; /* in func */
end; /* in func */
$$;
select wait_until_all_segments_synchronized();
5:SELECT role, preferred_role, content FROM gp_segment_configuration;
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册