From 96b332c0dbfc5bbac113565f9e40b8431b574da7 Mon Sep 17 00:00:00 2001 From: Alexandra Wang Date: Tue, 12 Feb 2019 18:22:28 -0800 Subject: [PATCH] Test mirror PANICs immediately when file is missing in conistent state Mirror should PANIC immediately when recovery is in consistent state but there are existing invalid page entries in invalid_page hash table. This makes sure to not delay the checking till mirror is promoted and helps catching the file missing problems on mirror sooner. The logic of mirror immediate PANIC was introduced in upstream commit 1e616f63. Since, no test exist for this in upstream and logic is used by AO tables as well, this patch is adding test to validate the stated behavior. Co-authored-by: Ashwin Agrawal --- src/test/regress/expected/mirror_replay.out | 110 ++++++++++++++++++++ src/test/regress/sql/mirror_replay.sql | 91 ++++++++++++++++ 2 files changed, 201 insertions(+) create mode 100644 src/test/regress/expected/mirror_replay.out create mode 100644 src/test/regress/sql/mirror_replay.sql diff --git a/src/test/regress/expected/mirror_replay.out b/src/test/regress/expected/mirror_replay.out new file mode 100644 index 0000000000..21bd9d6143 --- /dev/null +++ b/src/test/regress/expected/mirror_replay.out @@ -0,0 +1,110 @@ +-- Scenario for validating mirror PANIC immediately on truncate record replay +-- if file is missing. +-- start_ignore +create language plpythonu; +-- end_ignore +-- helper functions +CREATE FUNCTION rel_filepaths_on_segments(regclass) RETURNS TABLE(gp_segment_id int, filepath text) +STRICT STABLE LANGUAGE SQL AS + $fn$ + SELECT gp_execution_segment(), pg_relation_filepath($1) AS filepath + $fn$ +EXECUTE ON ALL SEGMENTS; +CREATE or REPLACE FUNCTION change_file_permission_readonly(path text) + RETURNS void as +$$ + import os, stat + + if not os.path.isfile(path): + plpy.info('skipping non-existent file %s' % (path)) + else: + plpy.info('changing file permission to readonly') + os.chmod(path, stat.S_IRUSR); +$$ LANGUAGE plpythonu; +CREATE or REPLACE FUNCTION wait_for_replication_replay (retries int) RETURNS bool AS +$$ +declare + i int; + result bool; +begin + i := 0; + -- Wait until all mirrors has replayed up to flush location + loop + SELECT flush_location = replay_location INTO result from gp_stat_replication where gp_segment_id = 0; + if result then + return true; + end if; + + if i >= retries then + return false; + end if; + perform pg_sleep(0.1); + i := i + 1; + end loop; +end; +$$ language plpgsql; +CREATE TABLE mirror_reply_test_table (a INT, b INT, c CHAR(20)) WITH (appendonly=true); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Greenplum Database data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. +INSERT INTO mirror_reply_test_table SELECT i AS a, 1 AS b, 'hello world' AS c FROM generate_series(1, 10) AS i; +-- need to wait till mirror creates the file on disk +SELECT wait_for_replication_replay(5000); + wait_for_replication_replay +----------------------------- + t +(1 row) + +-- step to inject fault to make sure truncate xlog replay cannot open the file in write mode +SELECT change_file_permission_readonly(datadir || '/' || + (SELECT filepath FROM rel_filepaths_on_segments('mirror_reply_test_table') + WHERE gp_segment_id = 0) || '.1') + FROM gp_segment_configuration WHERE content = 0 AND role = 'm'; +INFO: changing file permission to readonly +CONTEXT: PL/Python function "change_file_permission_readonly" + change_file_permission_readonly +--------------------------------- + +(1 row) + +-- generate truncate xlog record with non-zero EOF +VACUUM mirror_reply_test_table; +-- expecting mirror to be marked down +do $$ +begin + for i in 1..120 loop + if (select status = 'd' from gp_segment_configuration where content=0 and role = 'm') then + return; + end if; + perform gp_request_fts_probe_scan(); + end loop; +end; +$$; +SELECT role, preferred_role, mode, status FROM gp_segment_configuration WHERE content=0; + role | preferred_role | mode | status +------+----------------+------+-------- + p | p | n | u + m | m | n | d +(2 rows) + +-- post test cleanup +-- start_ignore +\! gprecoverseg -aF; +-- end_ignore +-- loop while segments come in sync +do $$ +begin + for i in 1..120 loop + if (select mode = 's' from gp_segment_configuration where content=0 and role = 'p') then + return; + end if; + perform gp_request_fts_probe_scan(); + end loop; +end; +$$; +SELECT role, preferred_role, mode, status FROM gp_segment_configuration WHERE content=0; + role | preferred_role | mode | status +------+----------------+------+-------- + p | p | s | u + m | m | s | u +(2 rows) + diff --git a/src/test/regress/sql/mirror_replay.sql b/src/test/regress/sql/mirror_replay.sql new file mode 100644 index 0000000000..e3393f3b31 --- /dev/null +++ b/src/test/regress/sql/mirror_replay.sql @@ -0,0 +1,91 @@ +-- Scenario for validating mirror PANIC immediately on truncate record replay +-- if file is missing. +-- start_ignore +create language plpythonu; +-- end_ignore + +-- helper functions +CREATE FUNCTION rel_filepaths_on_segments(regclass) RETURNS TABLE(gp_segment_id int, filepath text) +STRICT STABLE LANGUAGE SQL AS + $fn$ + SELECT gp_execution_segment(), pg_relation_filepath($1) AS filepath + $fn$ +EXECUTE ON ALL SEGMENTS; + +CREATE or REPLACE FUNCTION change_file_permission_readonly(path text) + RETURNS void as +$$ + import os, stat + + if not os.path.isfile(path): + plpy.info('skipping non-existent file %s' % (path)) + else: + plpy.info('changing file permission to readonly') + os.chmod(path, stat.S_IRUSR); +$$ LANGUAGE plpythonu; + +CREATE or REPLACE FUNCTION wait_for_replication_replay (retries int) RETURNS bool AS +$$ +declare + i int; + result bool; +begin + i := 0; + -- Wait until all mirrors has replayed up to flush location + loop + SELECT flush_location = replay_location INTO result from gp_stat_replication where gp_segment_id = 0; + if result then + return true; + end if; + + if i >= retries then + return false; + end if; + perform pg_sleep(0.1); + i := i + 1; + end loop; +end; +$$ language plpgsql; + +CREATE TABLE mirror_reply_test_table (a INT, b INT, c CHAR(20)) WITH (appendonly=true); +INSERT INTO mirror_reply_test_table SELECT i AS a, 1 AS b, 'hello world' AS c FROM generate_series(1, 10) AS i; +-- need to wait till mirror creates the file on disk +SELECT wait_for_replication_replay(5000); +-- step to inject fault to make sure truncate xlog replay cannot open the file in write mode +SELECT change_file_permission_readonly(datadir || '/' || + (SELECT filepath FROM rel_filepaths_on_segments('mirror_reply_test_table') + WHERE gp_segment_id = 0) || '.1') + FROM gp_segment_configuration WHERE content = 0 AND role = 'm'; +-- generate truncate xlog record with non-zero EOF +VACUUM mirror_reply_test_table; + +-- expecting mirror to be marked down +do $$ +begin + for i in 1..120 loop + if (select status = 'd' from gp_segment_configuration where content=0 and role = 'm') then + return; + end if; + perform gp_request_fts_probe_scan(); + end loop; +end; +$$; +SELECT role, preferred_role, mode, status FROM gp_segment_configuration WHERE content=0; + +-- post test cleanup +-- start_ignore +\! gprecoverseg -aF; +-- end_ignore + +-- loop while segments come in sync +do $$ +begin + for i in 1..120 loop + if (select mode = 's' from gp_segment_configuration where content=0 and role = 'p') then + return; + end if; + perform gp_request_fts_probe_scan(); + end loop; +end; +$$; +SELECT role, preferred_role, mode, status FROM gp_segment_configuration WHERE content=0; -- GitLab