diff --git a/src/backend/cdb/cdbdtxrecovery.c b/src/backend/cdb/cdbdtxrecovery.c index 243b76440046861f1fbd651e74812b05fdc0dbdb..a14b8135ebc4eb1771ce4200731237ea2b9f631b 100644 --- a/src/backend/cdb/cdbdtxrecovery.c +++ b/src/backend/cdb/cdbdtxrecovery.c @@ -565,8 +565,10 @@ redoDistributedCommitRecord(TMGXACT_LOG *gxact_log) ereport(FATAL, (errmsg("the limit of %d distributed transactions has been reached", max_tm_gxacts), - errdetail("The global user configuration (GUC) server " - "parameter max_prepared_transactions controls this limit."))); + errdetail("It should not happen. Temporarily increase " + "max_connections (need postmaster reboot) on " + "the postgres (master or standby) to work " + "around this issue and then report a bug"))); shmCommittedGxactArray[(*shmNumCommittedGxacts)++] = *gxact_log; elog((Debug_print_full_dtm ? LOG : DEBUG5), diff --git a/src/backend/cdb/cdbtm.c b/src/backend/cdb/cdbtm.c index a7566a535d1cf21a22a5e6ac683ba53570fbf64a..2b0eaa503c2077bd0abe0fdd97ce0d1335517630 100644 --- a/src/backend/cdb/cdbtm.c +++ b/src/backend/cdb/cdbtm.c @@ -610,6 +610,8 @@ doNotifyingCommitPrepared(void) (errmsg("the distributed transaction 'Commit Prepared' broadcast succeeded to all the segments"), TM_ERRDETAIL)); + SIMPLE_FAULT_INJECTOR("dtm_before_insert_forget_comitted"); + doInsertForgetCommitted(); /* @@ -999,13 +1001,36 @@ tmShmemInit(void) bool found; TmControlBlock *shared; + if (Gp_role == GP_ROLE_DISPATCH && max_prepared_xacts < MaxConnections) + elog(WARNING, "Better set max_prepared_transactions greater than max_connections"); + /* - * max_prepared_xacts is a guc which is postmaster-startup setable -- it - * can only be updated by restarting the system. Global transactions will - * all use two-phase commit, so the number of global transactions is bound - * to the number of prepared. + * max_prepared_transactions is a guc which is postmaster-startup setable + * -- it can only be updated by restarting the system. Global transactions + * will all use two-phase commit, so the number of global transactions is + * bound to the number of prepared. + * + * Note on master, it is possible that some prepared xacts just use partial + * gang so on QD the total prepared xacts might be quite large but it is + * limited by max_connections since one QD should only have one 2pc one + * time, so if we set max_tm_gxacts as max_prepared_transactions as before, + * shmCommittedGxactArray might not be able to accommodate committed but + * not forgotten transactions (standby recovery will fail if encountering + * this issue) if max_prepared_transactions is smaller than max_connections + * (though this is not suggested). Not to mention that + * max_prepared_transactions might be inconsistent between master/standby + * and segments (though this is not suggested). + * + * We can assign MaxBackends (MaxConnections should be fine also but let's + * be conservative) to max_tm_gxacts on master/standby to tolerate various + * configuration combinations of max_prepared_transactions and + * max_connections. For segments or utility mode, max_tm_gxacts is useless + * so let's set it as zero to save memory. */ - max_tm_gxacts = max_prepared_xacts; + if (Gp_role == GP_ROLE_DISPATCH) + max_tm_gxacts = MaxBackends; + else + max_tm_gxacts = 0; shared = (TmControlBlock *) ShmemInitStruct("Transaction manager", tmShmemSize(), &found); if (!shared) diff --git a/src/test/isolation2/expected/prepare_limit.out b/src/test/isolation2/expected/prepare_limit.out new file mode 100644 index 0000000000000000000000000000000000000000..0c36dfc3a5ed76f09a67782c1b2cb3018f8f9c95 --- /dev/null +++ b/src/test/isolation2/expected/prepare_limit.out @@ -0,0 +1,120 @@ +-- test to verify a bug that causes standby startup fatal with message like +-- "the limit of xxx distributed transactions has been reached". +-- Refer comment in https://github.com/greenplum-db/gpdb/issues/9207 for the +-- context. +include: helpers/server_helpers.sql; +CREATE + +-- We will reset the value to 250 finally so sanity check the current value here. +6: show max_prepared_transactions; + max_prepared_transactions +--------------------------- + 250 +(1 row) +!\retcode gpconfig -c max_prepared_transactions -v 3 --skipvalidation; +(exited with code 0) +!\retcode gpstop -ari; +(exited with code 0) + +5: create table prepare_limit1 (a int); +CREATE +5: create table prepare_limit2 (a int); +CREATE +5: create table prepare_limit3 (a int); +CREATE +5: create table prepare_limit4 (a int); +CREATE + +5: select gp_inject_fault_infinite('dtm_before_insert_forget_comitted', 'suspend', 1); + gp_inject_fault_infinite +-------------------------- + Success: +(1 row) + +-- Note first insert after table create triggers auto_stats and leads to 2pc +-- transaction. + +-- (2) is on seg0 +1&: insert into prepare_limit1 values(2); +2&: insert into prepare_limit2 values(2); + +-- (1) is on seg1 +3&: insert into prepare_limit3 values(1); +4&: insert into prepare_limit4 values(1); + +-- wait until these 2pc reach before inserting forget commit. +5: SELECT gp_wait_until_triggered_fault('dtm_before_insert_forget_comitted', 4, 1); + gp_wait_until_triggered_fault +------------------------------- + Success: +(1 row) + +-- wait until standby catches up and replays all xlogs. +5: select wait_for_replication_replay (-1, 5000); + wait_for_replication_replay +----------------------------- + t +(1 row) + +-- reset to make testing continue +5: select gp_inject_fault('dtm_before_insert_forget_comitted', 'reset', 1); + gp_inject_fault +----------------- + Success: +(1 row) +1<: <... completed> +INSERT 1 +2<: <... completed> +INSERT 1 +3<: <... completed> +INSERT 1 +4<: <... completed> +INSERT 1 + +-- verify that standby is correctly wal streaming. +5: select state from pg_stat_replication; + state +----------- + streaming +(1 row) + +-- verify the tuples are on correct segments so the test assumption is +-- correct. (i.e. tuple 2, 1 are on different segments). +5: select gp_segment_id, * from prepare_limit1; + gp_segment_id | a +---------------+--- + 0 | 2 +(1 row) +5: select gp_segment_id, * from prepare_limit2; + gp_segment_id | a +---------------+--- + 0 | 2 +(1 row) +5: select gp_segment_id, * from prepare_limit3; + gp_segment_id | a +---------------+--- + 1 | 1 +(1 row) +5: select gp_segment_id, * from prepare_limit4; + gp_segment_id | a +---------------+--- + 1 | 1 +(1 row) + +-- cleanup +5: drop table prepare_limit1; +DROP +5: drop table prepare_limit2; +DROP +5: drop table prepare_limit3; +DROP +5: drop table prepare_limit4; +DROP + +-- Not using gpconfig -r, else it makes max_prepared_transactions be default +-- (50) and some isolation2 tests will fail due to "too many clients". Hardcode +-- to 250 which is the default value when demo cluster is created. +!\retcode gpconfig -c max_prepared_transactions -v 250 --skipvalidation; +(exited with code 0) +!\retcode gpstop -ari; +(exited with code 0) diff --git a/src/test/isolation2/isolation2_schedule b/src/test/isolation2/isolation2_schedule index 5040348eed0a3b6b72630a36440be65f77c5b960..12f47ad4b73e2c735cd77a44aaa7da835169a038 100644 --- a/src/test/isolation2/isolation2_schedule +++ b/src/test/isolation2/isolation2_schedule @@ -1,6 +1,9 @@ test: setup test: lockmodes +# Put test prepare_limit near to test lockmodes since both of them reboot the +# cluster during testing. Usually the 2nd reboot should be faster. +test: prepare_limit test: ao_partition_lock test: dml_on_root_locks_all_parts diff --git a/src/test/isolation2/sql/prepare_limit.sql b/src/test/isolation2/sql/prepare_limit.sql new file mode 100644 index 0000000000000000000000000000000000000000..b1d82d582504b4bb9ceb39f3df60076485ff447e --- /dev/null +++ b/src/test/isolation2/sql/prepare_limit.sql @@ -0,0 +1,63 @@ +-- test to verify a bug that causes standby startup fatal with message like +-- "the limit of xxx distributed transactions has been reached". +-- Refer comment in https://github.com/greenplum-db/gpdb/issues/9207 for the +-- context. +include: helpers/server_helpers.sql; + +-- We will reset the value to 250 finally so sanity check the current value here. +6: show max_prepared_transactions; +!\retcode gpconfig -c max_prepared_transactions -v 3 --skipvalidation; +!\retcode gpstop -ari; + +5: create table prepare_limit1 (a int); +5: create table prepare_limit2 (a int); +5: create table prepare_limit3 (a int); +5: create table prepare_limit4 (a int); + +5: select gp_inject_fault_infinite('dtm_before_insert_forget_comitted', 'suspend', 1); + +-- Note first insert after table create triggers auto_stats and leads to 2pc +-- transaction. + +-- (2) is on seg0 +1&: insert into prepare_limit1 values(2); +2&: insert into prepare_limit2 values(2); + +-- (1) is on seg1 +3&: insert into prepare_limit3 values(1); +4&: insert into prepare_limit4 values(1); + +-- wait until these 2pc reach before inserting forget commit. +5: SELECT gp_wait_until_triggered_fault('dtm_before_insert_forget_comitted', 4, 1); + +-- wait until standby catches up and replays all xlogs. +5: select wait_for_replication_replay (-1, 5000); + +-- reset to make testing continue +5: select gp_inject_fault('dtm_before_insert_forget_comitted', 'reset', 1); +1<: +2<: +3<: +4<: + +-- verify that standby is correctly wal streaming. +5: select state from pg_stat_replication; + +-- verify the tuples are on correct segments so the test assumption is +-- correct. (i.e. tuple 2, 1 are on different segments). +5: select gp_segment_id, * from prepare_limit1; +5: select gp_segment_id, * from prepare_limit2; +5: select gp_segment_id, * from prepare_limit3; +5: select gp_segment_id, * from prepare_limit4; + +-- cleanup +5: drop table prepare_limit1; +5: drop table prepare_limit2; +5: drop table prepare_limit3; +5: drop table prepare_limit4; + +-- Not using gpconfig -r, else it makes max_prepared_transactions be default +-- (50) and some isolation2 tests will fail due to "too many clients". Hardcode +-- to 250 which is the default value when demo cluster is created. +!\retcode gpconfig -c max_prepared_transactions -v 250 --skipvalidation; +!\retcode gpstop -ari;