提交 ab0f3296 编写于 作者: A Asim R P 提交者: Asim RP

Test case for DTM retry error handling

The test hits the PG_CATCH() block in DTM retry logic.  It uncovers a
bug in that part of the code, leading to PANIC due to
ERRORDATA_STACK_SIZE exceeded.

The upper limit on dtx_phase2_retry_count is increased to 15.  That
allows to keep the test simpler by avoiding PANIC due to max retry count
reached.
上级 49fc2332
......@@ -4379,7 +4379,7 @@ struct config_int ConfigureNamesInt_gp[] =
GUC_SUPERUSER_ONLY | GUC_NO_SHOW_ALL | GUC_NOT_IN_SAMPLE | GUC_GPDB_ADDOPT
},
&dtx_phase2_retry_count,
2, 0, 10,
2, 0, 15,
NULL, NULL, NULL
},
......
-- Check if retry logic handles errors correctly. The retry logic had
-- a bug where error state wasn't cleaned up correctly during retries,
-- leading to PANIC due to ERRORDATA_STACK_SIZE exeeded.
set dtx_phase2_retry_count = 11;
-- Set a fault on one of the QEs such that an error is raised exactly
-- 10 times at the beginning of 2nd phase of 2PC.
-- ERRORDATA_STACK_SIZE is defined as 10. By erroring out 10 times,
-- the error handling logic in QD gets invoked 10 times. If error
-- state is not cleaned up before each retry, it is suffice to return
-- an error 10 times to hit the above mentioned PANIC.
--
-- COMMIT_PREPARED
--
create extension if not exists gp_inject_fault;
select gp_inject_fault('finish_prepared_start_of_function', 'error', '', '', '', 1, 10, 0, dbid)
from gp_segment_configuration where role = 'p' and status = 'u' and content = 1;
NOTICE: Success:
gp_inject_fault
-----------------
t
(1 row)
begin;
create table dtm_retry_table (a int) distributed by (a);
-- Expected behavior is QD makes 11 attempts to commit and the last
-- one succeeds.
end;
WARNING: the distributed transaction 'Commit Prepared' broadcast failed to one or more segments for gid DUMMY
NOTICE: Releasing segworker group to retry broadcast.
WARNING: the distributed transaction 'Commit Prepared' broadcast failed to one or more segments for gid DUMMY
NOTICE: Releasing segworker group to retry broadcast.
WARNING: the distributed transaction 'Commit Prepared' broadcast failed to one or more segments for gid DUMMY
NOTICE: Releasing segworker group to retry broadcast.
WARNING: the distributed transaction 'Commit Prepared' broadcast failed to one or more segments for gid DUMMY
NOTICE: Releasing segworker group to retry broadcast.
WARNING: the distributed transaction 'Commit Prepared' broadcast failed to one or more segments for gid DUMMY
NOTICE: Releasing segworker group to retry broadcast.
WARNING: the distributed transaction 'Commit Prepared' broadcast failed to one or more segments for gid DUMMY
NOTICE: Releasing segworker group to retry broadcast.
WARNING: the distributed transaction 'Commit Prepared' broadcast failed to one or more segments for gid DUMMY
NOTICE: Releasing segworker group to retry broadcast.
WARNING: the distributed transaction 'Commit Prepared' broadcast failed to one or more segments for gid DUMMY
NOTICE: Releasing segworker group to retry broadcast.
WARNING: the distributed transaction 'Commit Prepared' broadcast failed to one or more segments for gid DUMMY
NOTICE: Releasing segworker group to retry broadcast.
WARNING: the distributed transaction 'Commit Prepared' broadcast failed to one or more segments for gid DUMMY
NOTICE: Releasing segworker group to retry broadcast.
-- Reset all faults.
select gp_inject_fault_infinite('all', 'reset', dbid) from gp_segment_configuration;
NOTICE: Success:
gp_inject_fault_infinite
--------------------------
t
t
t
t
t
t
t
t
(8 rows)
-- Verify that the table got created properly on all segments.
insert into dtm_retry_table select * from generate_series(1,12);
--
-- ABORT_SOME_PREPARED
--
-- Let content 0 primary error out during prepare. This leads to
-- abort_some_prepared broadcast in 2nd phase.
select gp_inject_fault('start_prepare', 'error', dbid)
from gp_segment_configuration where role = 'p' and status = 'u' and content = 0;
NOTICE: Success:
gp_inject_fault
-----------------
t
(1 row)
-- Let content 1 primary, which had successfully prepared the
-- transaction, report error 10 times upon receiving
-- abort_some_prepared request.
select gp_inject_fault('finish_prepared_start_of_function', 'error', '', '', '', 1, 10, 0, dbid)
from gp_segment_configuration where role = 'p' and status = 'u' and content = 1;
NOTICE: Success:
gp_inject_fault
-----------------
t
(1 row)
begin;
truncate table dtm_retry_table;
-- Expected behavior: QD's 11th attempt to broadcast
-- abort_some_prepared message succeeds.
end;
WARNING: the distributed transaction 'Abort [Prepared]' broadcast failed to one or more segments for gid = 1532027571-0000000036. Retrying ... try 0
NOTICE: Releasing segworker groups to retry broadcast.
NOTICE: Releasing segworker groups to retry broadcast.
NOTICE: Releasing segworker groups to retry broadcast.
NOTICE: Releasing segworker groups to retry broadcast.
NOTICE: Releasing segworker groups to retry broadcast.
NOTICE: Releasing segworker groups to retry broadcast.
NOTICE: Releasing segworker groups to retry broadcast.
NOTICE: Releasing segworker groups to retry broadcast.
NOTICE: Releasing segworker groups to retry broadcast.
ERROR: fault triggered, fault name:'start_prepare' fault type:'error'
-- Verify that the table wasn't truncated due to prevous transaction
-- being aborted.
select count(*) = 12 from dtm_retry_table;
?column?
----------
t
(1 row)
-- Reset all faults.
select gp_inject_fault_infinite('all', 'reset', dbid) from gp_segment_configuration;
NOTICE: Success:
gp_inject_fault_infinite
--------------------------
t
t
t
t
t
t
t
t
(8 rows)
--
-- ABORT_PREPARED
--
-- After successful prepare, QD should error out, leading to
-- abort_prepared broadcast in 2nd phase.
select gp_inject_fault('dtm_broadcast_prepare', 'error', dbid)
from gp_segment_configuration where role = 'p' and content = -1;
NOTICE: Success:
gp_inject_fault
-----------------
t
(1 row)
-- Let content 0 primary error out 10 times during second phase.
select gp_inject_fault('finish_prepared_start_of_function', 'error', '', '', '', 1, 10, 0, dbid)
from gp_segment_configuration where role = 'p' and status = 'u' and content = 0;
NOTICE: Success:
gp_inject_fault
-----------------
t
(1 row)
begin;
truncate table dtm_retry_table;
-- Expected behavior: QD's 11th attempt to broadcast
-- abort_prepared message succeeds.
end;
WARNING: the distributed transaction 'Abort Prepared' broadcast failed to one or more segments for gid DUMMY
NOTICE: Releasing segworker groups to retry broadcast.
NOTICE: Releasing segworker groups to retry broadcast.
NOTICE: Releasing segworker groups to retry broadcast.
NOTICE: Releasing segworker groups to retry broadcast.
NOTICE: Releasing segworker groups to retry broadcast.
NOTICE: Releasing segworker groups to retry broadcast.
NOTICE: Releasing segworker groups to retry broadcast.
NOTICE: Releasing segworker groups to retry broadcast.
NOTICE: Releasing segworker groups to retry broadcast.
ERROR: fault triggered, fault name:'dtm_broadcast_prepare' fault type:'error'
-- Verify that the table wasn't truncated due to prevous transaction
-- being aborted.
select count(*) = 12 from dtm_retry_table;
?column?
----------
t
(1 row)
-- Reset all faults.
select gp_inject_fault_infinite('all', 'reset', dbid) from gp_segment_configuration;
NOTICE: Success:
gp_inject_fault_infinite
--------------------------
t
t
t
t
t
t
t
t
(8 rows)
......@@ -57,6 +57,10 @@ test: query_finish_pending
test: gpdiffcheck gptokencheck gp_hashagg sequence_gp tidscan co_nestloop_idxscan dml_in_udf gpdtm_plpgsql
# The test must be run by itself as it injects a fault on QE to fail
# at the 2nd phase of 2PC.
test: dtm_retry
test: rangefuncs_cdb gp_aggregates gp_dqa subselect_gp subselect_gp2 gp_transactions distributed_transactions olap_group olap_window_seq sirv_functions appendonly create_table_distpol alter_distpol_dropped query_finish
# 'partition' runs for a long time, so try to keep it together with other
......
-- Check if retry logic handles errors correctly. The retry logic had
-- a bug where error state wasn't cleaned up correctly during retries,
-- leading to PANIC due to ERRORDATA_STACK_SIZE exeeded.
set dtx_phase2_retry_count = 11;
-- Set a fault on one of the QEs such that an error is raised exactly
-- 10 times at the beginning of 2nd phase of 2PC.
-- ERRORDATA_STACK_SIZE is defined as 10. By erroring out 10 times,
-- the error handling logic in QD gets invoked 10 times. If error
-- state is not cleaned up before each retry, it is suffice to return
-- an error 10 times to hit the above mentioned PANIC.
--
-- COMMIT_PREPARED
--
create extension if not exists gp_inject_fault;
select gp_inject_fault('finish_prepared_start_of_function', 'error', '', '', '', 1, 10, 0, dbid)
from gp_segment_configuration where role = 'p' and status = 'u' and content = 1;
begin;
create table dtm_retry_table (a int) distributed by (a);
-- Expected behavior is QD makes 11 attempts to commit and the last
-- one succeeds.
end;
-- Reset all faults.
select gp_inject_fault_infinite('all', 'reset', dbid) from gp_segment_configuration;
-- Verify that the table got created properly on all segments.
insert into dtm_retry_table select * from generate_series(1,12);
--
-- ABORT_SOME_PREPARED
--
-- Let content 0 primary error out during prepare. This leads to
-- abort_some_prepared broadcast in 2nd phase.
select gp_inject_fault('start_prepare', 'error', dbid)
from gp_segment_configuration where role = 'p' and status = 'u' and content = 0;
-- Let content 1 primary, which had successfully prepared the
-- transaction, report error 10 times upon receiving
-- abort_some_prepared request.
select gp_inject_fault('finish_prepared_start_of_function', 'error', '', '', '', 1, 10, 0, dbid)
from gp_segment_configuration where role = 'p' and status = 'u' and content = 1;
begin;
truncate table dtm_retry_table;
-- Expected behavior: QD's 11th attempt to broadcast
-- abort_some_prepared message succeeds.
end;
-- Verify that the table wasn't truncated due to prevous transaction
-- being aborted.
select count(*) = 12 from dtm_retry_table;
-- Reset all faults.
select gp_inject_fault_infinite('all', 'reset', dbid) from gp_segment_configuration;
--
-- ABORT_PREPARED
--
-- After successful prepare, QD should error out, leading to
-- abort_prepared broadcast in 2nd phase.
select gp_inject_fault('dtm_broadcast_prepare', 'error', dbid)
from gp_segment_configuration where role = 'p' and content = -1;
-- Let content 0 primary error out 10 times during second phase.
select gp_inject_fault('finish_prepared_start_of_function', 'error', '', '', '', 1, 10, 0, dbid)
from gp_segment_configuration where role = 'p' and status = 'u' and content = 0;
begin;
truncate table dtm_retry_table;
-- Expected behavior: QD's 11th attempt to broadcast
-- abort_prepared message succeeds.
end;
-- Verify that the table wasn't truncated due to prevous transaction
-- being aborted.
select count(*) = 12 from dtm_retry_table;
-- Reset all faults.
select gp_inject_fault_infinite('all', 'reset', dbid) from gp_segment_configuration;
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册