提交 909fbc66 编写于 作者: J Jacob Champion 提交者: Xin Zhang

Add mirror_promotion test for failover and back

The test uses \retcode to ignore the output of the external script, but
remember the return code in the expected result.

It's very useful for this scenario to ensure, we don't care for the
details of the recoveryfull operation, but we do care this operation
complete successfully.

The test stop the primary and trigger failover to its mirror, then fail
back to the original segment configuration. At the end of the test, the
gp_segment_configuration is the same as the beginning of the test.

Author: Xin Zhang <xzhang@pivotal.io>
Author: Jacob Champion <pchampion@pivotal.io>
Author: Ashwin Agrawal <aagrawal@pivotal.io>
上级 4c82a3dc
-- start_ignore
create language plpythonu;
CREATE
-- end_ignore
create or replace function pg_ctl(datadir text, command text, port int, contentid int) returns text as $$ import subprocess
cmd = 'pg_ctl -D %s ' % datadir if command in ('stop', 'restart'): cmd = cmd + '-w -m immediate %s' % command elif command == 'start': opts = '-p %d -\-gp_dbid=0 -\-silent-mode=true -i -M mirrorless -\-gp_contentid=%d -\-gp_num_contents_in_cluster=3' % (port, contentid) cmd = cmd + '-o "%s" start' % opts elif command == 'reload': cmd = cmd + 'reload' else: return 'Invalid command input'
return subprocess.check_output(cmd, stderr=subprocess.STDOUT, shell=True).replace('.', '') $$ language plpythonu;
CREATE
-- stop a primary in order to trigger a mirror promotion
select pg_ctl((select datadir from gp_segment_configuration c where c.role='p' and c.content=0), 'stop', NULL, NULL);
pg_ctl
----------------------------------------------------
waiting for server to shut down done
server stopped
(1 row)
-- trigger failover
select gp_request_fts_probe_scan();
gp_request_fts_probe_scan
-------------------------
t
(1 row)
-- expect: to see the content 0, preferred primary is mirror and it's down
-- the preferred mirror is primary and it's up and not-in-sync
select content, preferred_role, role, status, mode from gp_segment_configuration where content = 0;
content|preferred_role|role|status|mode
-------+--------------+----+------+----
0 |p |m |d |n
0 |m |p |u |n
(2 rows)
-- wait for dbid 5 (mirror for content 0) to finish the promotion
5U: select 1;
?column?
--------
1
(1 row)
-- fully recover the failed primary as new mirror
!\retcode ../../../gpAux/gpdemo/gpsegwalrep.py recoverfull;
-- start_ignore
2018-01-08 16:00:58.097007: fetching cluster configuration
2018-01-08 16:00:58.104765: fetched cluster configuration
found 3 distinct content IDs
Mirror content 0: 2018-01-08 16:00:58.129024: Running command... rm -rf /home/xzhang/workspace/gpdb/gpAux/gpdemo/datadirs/dbfast1/demoDataDir0
Mirror content 0:
Mirror content 0: 2018-01-08 16:00:58.174251: Running command... pg_basebackup -x -R -c fast -E ./pg_log -E ./db_dumps -E ./gpperfmon/data -E ./gpperfmon/logs -D /home/xzhang/workspace/gpdb/gpAux/gpdemo/datadirs/dbfast1/demoDataDir0 -h localhost -p 25435
Mirror content 0: NOTICE: WAL archiving is not enabled; you must ensure that all required WAL segments are copied through other means to complete the backup
Mirror content 0:
Mirror content 0: 2018-01-08 16:00:58.804815: Running command... mkdir /home/xzhang/workspace/gpdb/gpAux/gpdemo/datadirs/dbfast1/demoDataDir0/pg_log; mkdir /home/xzhang/workspace/gpdb/gpAux/gpdemo/datadirs/dbfast1/demoDataDir0/pg_xlog/archive_status
Mirror content 0:
Mirror content 0: 2018-01-08 16:00:58.812926: Initialized mirror at /home/xzhang/workspace/gpdb/gpAux/gpdemo/datadirs/dbfast1/demoDataDir0
Segment primary content 0: 2018-01-08 16:00:58.813229: Running command... pg_ctl -D /home/xzhang/workspace/gpdb/gpAux/gpdemo/datadirs/dbfast1/demoDataDir0 -o '-p 25432 --gp_dbid=0 --silent-mode=true -i -M mirrorless --gp_contentid=0 --gp_num_contents_in_cluster=3' start
Segment primary content 0: server starting
Segment primary content 0:
Segment primary content 0: 2018-01-08 16:00:58.846892: Running command... pg_ctl -D /home/xzhang/workspace/gpdb/gpAux/gpdemo/datadirs/dbfast1/demoDataDir0 status
Segment primary content 0: pg_ctl: server is running (PID: 4006)
Segment primary content 0:
Segment primary content 0: 2018-01-08 16:00:58.849761: Started primary segment with content 0 and port 25432 at /home/xzhang/workspace/gpdb/gpAux/gpdemo/datadirs/dbfast1/demoDataDir0
Force FTS probe scan: 2018-01-08 16:00:58.849919: Running command... psql postgres -c "SELECT gp_request_fts_probe_scan()"
Force FTS probe scan: gp_request_fts_probe_scan
Force FTS probe scan: ---------------------------
Force FTS probe scan: t
Force FTS probe scan: (1 row)
Force FTS probe scan:
Force FTS probe scan:
Force FTS probe scan: 2018-01-08 16:00:59.914176: FTS probe refreshed catalog
2018-01-08 16:00:59.914229: fetching cluster configuration
2018-01-08 16:00:59.924334: fetched cluster configuration
found 3 distinct content IDs
: 2018-01-08 16:00:59.937801: Running command... psql postgres -c "select * from gp_segment_configuration order by content, dbid"
: dbid | content | role | preferred_role | mode | status | port | hostname | address | datadir
: ------+---------+------+----------------+------+--------+-------+-------------+-------------+-------------------------------------------------------------------------------
: 1 | -1 | p | p | n | u | 15432 | xzhang-iMac | xzhang-iMac | /home/xzhang/workspace/gpdb/gpAux/gpdemo/datadirs/qddir/demoDataDir-1
: 8 | -1 | m | m | s | u | 16432 | xzhang-iMac | xzhang-iMac | /home/xzhang/workspace/gpdb/gpAux/gpdemo/datadirs/standby
: 2 | 0 | m | p | s | u | 25432 | xzhang-iMac | xzhang-iMac | /home/xzhang/workspace/gpdb/gpAux/gpdemo/datadirs/dbfast1/demoDataDir0
: 5 | 0 | p | m | s | u | 25435 | xzhang-iMac | xzhang-iMac | /home/xzhang/workspace/gpdb/gpAux/gpdemo/datadirs/dbfast_mirror1/demoDataDir0
: 3 | 1 | p | p | s | u | 25433 | xzhang-iMac | xzhang-iMac | /home/xzhang/workspace/gpdb/gpAux/gpdemo/datadirs/dbfast2/demoDataDir1
: 6 | 1 | m | m | s | u | 25436 | xzhang-iMac | xzhang-iMac | /home/xzhang/workspace/gpdb/gpAux/gpdemo/datadirs/dbfast_mirror2/demoDataDir1
: 4 | 2 | p | p | s | u | 25434 | xzhang-iMac | xzhang-iMac | /home/xzhang/workspace/gpdb/gpAux/gpdemo/datadirs/dbfast3/demoDataDir2
: 7 | 2 | m | m | s | u | 25437 | xzhang-iMac | xzhang-iMac | /home/xzhang/workspace/gpdb/gpAux/gpdemo/datadirs/dbfast_mirror3/demoDataDir2
: (8 rows)
:
:
: 2018-01-08 16:00:59.950212:
-- end_ignore
(exited with code 0)
-- expect: to see the new rebuilt mirror up and in sync
select content, preferred_role, role, status, mode from gp_segment_configuration where content = 0;
content|preferred_role|role|status|mode
-------+--------------+----+------+----
0 |m |p |u |s
0 |p |m |u |s
(2 rows)
-- now, let's stop the new primary, so that we can restore original role
select pg_ctl((select datadir from gp_segment_configuration c where c.role='p' and c.content=0), 'stop', NULL, NULL);
pg_ctl
----------------------------------------------------
waiting for server to shut down done
server stopped
(1 row)
-- trigger failover
select gp_request_fts_probe_scan();
gp_request_fts_probe_scan
-------------------------
t
(1 row)
-- expect segments restored back to its preferred role, but mirror is down
select content, preferred_role, role, status, mode from gp_segment_configuration where content = 0;
content|preferred_role|role|status|mode
-------+--------------+----+------+----
0 |m |m |d |n
0 |p |p |u |n
(2 rows)
-- wait for dbid 2 (primary for content 0) finish promotion
2U: select 1;
?column?
--------
1
(1 row)
-- now, let's fully recover the mirror
!\retcode ../../../gpAux/gpdemo/gpsegwalrep.py recoverfull;
-- start_ignore
2018-01-08 16:01:06.212959: fetching cluster configuration
2018-01-08 16:01:06.218760: fetched cluster configuration
found 3 distinct content IDs
Mirror content 0: 2018-01-08 16:01:06.233588: Running command... rm -rf /home/xzhang/workspace/gpdb/gpAux/gpdemo/datadirs/dbfast_mirror1/demoDataDir0
Mirror content 0:
Mirror content 0: 2018-01-08 16:01:06.288272: Running command... pg_basebackup -x -R -c fast -E ./pg_log -E ./db_dumps -E ./gpperfmon/data -E ./gpperfmon/logs -D /home/xzhang/workspace/gpdb/gpAux/gpdemo/datadirs/dbfast_mirror1/demoDataDir0 -h localhost -p 25432
Mirror content 0: NOTICE: WAL archiving is not enabled; you must ensure that all required WAL segments are copied through other means to complete the backup
Mirror content 0:
Mirror content 0: 2018-01-08 16:01:06.726959: Running command... mkdir /home/xzhang/workspace/gpdb/gpAux/gpdemo/datadirs/dbfast_mirror1/demoDataDir0/pg_log; mkdir /home/xzhang/workspace/gpdb/gpAux/gpdemo/datadirs/dbfast_mirror1/demoDataDir0/pg_xlog/archive_status
Mirror content 0:
Mirror content 0: 2018-01-08 16:01:06.732329: Initialized mirror at /home/xzhang/workspace/gpdb/gpAux/gpdemo/datadirs/dbfast_mirror1/demoDataDir0
Segment mirror content 0: 2018-01-08 16:01:06.732833: Running command... pg_ctl -D /home/xzhang/workspace/gpdb/gpAux/gpdemo/datadirs/dbfast_mirror1/demoDataDir0 -o '-p 25435 --gp_dbid=0 --silent-mode=true -i -M mirrorless --gp_contentid=0 --gp_num_contents_in_cluster=3' start
Segment mirror content 0: server starting
Segment mirror content 0:
Segment mirror content 0: 2018-01-08 16:01:06.765965: Running command... pg_ctl -D /home/xzhang/workspace/gpdb/gpAux/gpdemo/datadirs/dbfast_mirror1/demoDataDir0 status
Segment mirror content 0: pg_ctl: server is running (PID: 4285)
Segment mirror content 0:
Segment mirror content 0: 2018-01-08 16:01:06.769486: Started mirror segment with content 0 and port 25435 at /home/xzhang/workspace/gpdb/gpAux/gpdemo/datadirs/dbfast_mirror1/demoDataDir0
Force FTS probe scan: 2018-01-08 16:01:06.769688: Running command... psql postgres -c "SELECT gp_request_fts_probe_scan()"
Force FTS probe scan: gp_request_fts_probe_scan
Force FTS probe scan: ---------------------------
Force FTS probe scan: t
Force FTS probe scan: (1 row)
Force FTS probe scan:
Force FTS probe scan:
Force FTS probe scan: 2018-01-08 16:01:07.836626: FTS probe refreshed catalog
2018-01-08 16:01:07.836704: fetching cluster configuration
2018-01-08 16:01:07.846714: fetched cluster configuration
found 3 distinct content IDs
: 2018-01-08 16:01:07.859669: Running command... psql postgres -c "select * from gp_segment_configuration order by content, dbid"
: dbid | content | role | preferred_role | mode | status | port | hostname | address | datadir
: ------+---------+------+----------------+------+--------+-------+-------------+-------------+-------------------------------------------------------------------------------
: 1 | -1 | p | p | n | u | 15432 | xzhang-iMac | xzhang-iMac | /home/xzhang/workspace/gpdb/gpAux/gpdemo/datadirs/qddir/demoDataDir-1
: 8 | -1 | m | m | s | u | 16432 | xzhang-iMac | xzhang-iMac | /home/xzhang/workspace/gpdb/gpAux/gpdemo/datadirs/standby
: 2 | 0 | p | p | s | u | 25432 | xzhang-iMac | xzhang-iMac | /home/xzhang/workspace/gpdb/gpAux/gpdemo/datadirs/dbfast1/demoDataDir0
: 5 | 0 | m | m | s | u | 25435 | xzhang-iMac | xzhang-iMac | /home/xzhang/workspace/gpdb/gpAux/gpdemo/datadirs/dbfast_mirror1/demoDataDir0
: 3 | 1 | p | p | s | u | 25433 | xzhang-iMac | xzhang-iMac | /home/xzhang/workspace/gpdb/gpAux/gpdemo/datadirs/dbfast2/demoDataDir1
: 6 | 1 | m | m | s | u | 25436 | xzhang-iMac | xzhang-iMac | /home/xzhang/workspace/gpdb/gpAux/gpdemo/datadirs/dbfast_mirror2/demoDataDir1
: 4 | 2 | p | p | s | u | 25434 | xzhang-iMac | xzhang-iMac | /home/xzhang/workspace/gpdb/gpAux/gpdemo/datadirs/dbfast3/demoDataDir2
: 7 | 2 | m | m | s | u | 25437 | xzhang-iMac | xzhang-iMac | /home/xzhang/workspace/gpdb/gpAux/gpdemo/datadirs/dbfast_mirror3/demoDataDir2
: (8 rows)
:
:
: 2018-01-08 16:01:07.873428:
-- end_ignore
(exited with code 0)
-- now, the content 0 primary and mirror should be at their preferred role
-- and up and in-sync
select content, preferred_role, role, status, mode from gp_segment_configuration where content = 0;
content|preferred_role|role|status|mode
-------+--------------+----+------+----
0 |p |p |u |s
0 |m |m |u |s
(2 rows)
......@@ -107,3 +107,4 @@ test: vacuum_after_vacuum_skip_drop_column
# Tests for FTS
test: segwalrep/commit_blocking
test: segwalrep/fts_unblock_primary
test: segwalrep/mirror_promotion
-- start_ignore
create language plpythonu;
-- end_ignore
create or replace function pg_ctl(datadir text, command text, port int, contentid int)
returns text as $$
import subprocess
cmd = 'pg_ctl -D %s ' % datadir
if command in ('stop', 'restart'):
cmd = cmd + '-w -m immediate %s' % command
elif command == 'start':
opts = '-p %d -\-gp_dbid=0 -\-silent-mode=true -i -M mirrorless -\-gp_contentid=%d -\-gp_num_contents_in_cluster=3' % (port, contentid)
cmd = cmd + '-o "%s" start' % opts
elif command == 'reload':
cmd = cmd + 'reload'
else:
return 'Invalid command input'
return subprocess.check_output(cmd, stderr=subprocess.STDOUT, shell=True).replace('.', '')
$$ language plpythonu;
-- stop a primary in order to trigger a mirror promotion
select pg_ctl((select datadir from gp_segment_configuration c
where c.role='p' and c.content=0), 'stop', NULL, NULL);
-- trigger failover
select gp_request_fts_probe_scan();
-- expect: to see the content 0, preferred primary is mirror and it's down
-- the preferred mirror is primary and it's up and not-in-sync
select content, preferred_role, role, status, mode
from gp_segment_configuration
where content = 0;
-- wait for dbid 5 (mirror for content 0) to finish the promotion
5U: select 1;
-- fully recover the failed primary as new mirror
!\retcode ../../../gpAux/gpdemo/gpsegwalrep.py recoverfull;
-- expect: to see the new rebuilt mirror up and in sync
select content, preferred_role, role, status, mode
from gp_segment_configuration
where content = 0;
-- now, let's stop the new primary, so that we can restore original role
select pg_ctl((select datadir from gp_segment_configuration c
where c.role='p' and c.content=0), 'stop', NULL, NULL);
-- trigger failover
select gp_request_fts_probe_scan();
-- expect segments restored back to its preferred role, but mirror is down
select content, preferred_role, role, status, mode
from gp_segment_configuration
where content = 0;
-- wait for dbid 2 (primary for content 0) finish promotion
2U: select 1;
-- now, let's fully recover the mirror
!\retcode ../../../gpAux/gpdemo/gpsegwalrep.py recoverfull;
-- now, the content 0 primary and mirror should be at their preferred role
-- and up and in-sync
select content, preferred_role, role, status, mode
from gp_segment_configuration
where content = 0;
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册