diff --git a/src/backend/cdb/dispatcher/cdbconn.c b/src/backend/cdb/dispatcher/cdbconn.c index 842f2e32b4c202db6b16f3d0f61a70ab6d328ce5..dfacca23c06ca6ffd7ba22f3c81f0644700ce960 100644 --- a/src/backend/cdb/dispatcher/cdbconn.c +++ b/src/backend/cdb/dispatcher/cdbconn.c @@ -359,7 +359,7 @@ void cdbconn_doConnect(SegmentDatabaseDescriptor *segdbDesc, if (!segdbDesc->errcode) segdbDesc->errcode = ERRCODE_GP_INTERCONNECTION_ERROR; - appendPQExpBuffer(&segdbDesc->error_message, "%s\n", PQerrorMessage(segdbDesc->conn)); + appendPQExpBuffer(&segdbDesc->error_message, "%s", PQerrorMessage(segdbDesc->conn)); /* Don't use elog, it's not thread-safe */ if (gp_log_gang >= GPVARS_VERBOSITY_DEBUG) diff --git a/src/backend/cdb/dispatcher/cdbdisp_query.c b/src/backend/cdb/dispatcher/cdbdisp_query.c index fb6d83a3fd81ec2c7ee660174661b0d8fcdcc411..05099210b31155fe63750322f5e8ad9ef2ab5eb6 100644 --- a/src/backend/cdb/dispatcher/cdbdisp_query.c +++ b/src/backend/cdb/dispatcher/cdbdisp_query.c @@ -22,6 +22,7 @@ #include "utils/guc.h" #include "utils/lsyscache.h" #include "utils/memutils.h" +#include "utils/faultinjector.h" #include "miscadmin.h" #include "cdb/cdbdisp.h" @@ -1286,6 +1287,8 @@ cdbdisp_dispatchX(DispatchCommandQueryParms *pQueryParms, ds->primaryResults->writer_gang = primaryGang; cdbdisp_dispatchToGang(ds, primaryGang, si, &direct); + + SIMPLE_FAULT_INJECTOR(AfterOneSliceDispatched); } pfree(sliceVector); diff --git a/src/backend/cdb/dispatcher/cdbgang_async.c b/src/backend/cdb/dispatcher/cdbgang_async.c index b1fe3d3c30883402f2622ddd602348ddf5cdbd7c..60e4580e38586945cb7d2a0cd2d47b45dec30ff5 100644 --- a/src/backend/cdb/dispatcher/cdbgang_async.c +++ b/src/backend/cdb/dispatcher/cdbgang_async.c @@ -27,7 +27,6 @@ #include "miscadmin.h" #include "utils/gp_atomic.h" -static int getTimeout(const struct timeval* startTS); static Gang *createGang_async(GangType type, int gang_id, int size, int content); CreateGangFunc pCreateGangFuncAsync = createGang_async; @@ -48,6 +47,7 @@ createGang_async(GangType type, int gang_id, int size, int content) int in_recovery_mode_count = 0; int successful_connections = 0; bool retry = false; + bool timeout = false; ELOG_DISPATCHER_DEBUG("createGang type = %d, gang_id = %d, size = %d, content = %d", type, gang_id, size, content); @@ -63,7 +63,7 @@ createGang_async(GangType type, int gang_id, int size, int content) /* Check writer gang firstly*/ if (type != GANGTYPE_PRIMARY_WRITER && !isPrimaryWriterGangAlive()) ereport(ERROR, (errcode(ERRCODE_GP_INTERCONNECTION_ERROR), - errmsg("failed to create gang on one or more segments"), + errmsg("failed to acquire resources on one or more segments"), errdetail("writer gang got broken before creating reader gangs"))); create_gang_retry: @@ -82,7 +82,6 @@ create_gang_retry: MemoryContextSwitchTo(newGangDefinition->perGangContext); struct pollfd *fds; - struct timeval startTS; PG_TRY(); { @@ -115,7 +114,7 @@ create_gang_retry: if(cdbconn_isBadConnection(segdbDesc)) ereport(ERROR, (errcode(ERRCODE_GP_INTERCONNECTION_ERROR), - errmsg("failed to create gang on one or more segments"), + errmsg("failed to acquire resources on one or more segments"), errdetail("%s (%s)", PQerrorMessage(segdbDesc->conn), segdbDesc->whoami))); } @@ -124,13 +123,11 @@ create_gang_retry: * timeout clock (= get the start timestamp), and poll until they're * all completed or we reach timeout. */ - gettimeofday(&startTS, NULL); fds = (struct pollfd *) palloc0(sizeof(struct pollfd) * size); for(;;) { int nready; - int timeout; int nfds = 0; for (i = 0; i < size; i++) @@ -153,7 +150,7 @@ create_gang_retry: errmsg("failed to acquire resources on one or more segments"), errdetail("Internal error: No motion listener port (%s)", segdbDesc->whoami))); successful_connections++; - break; + continue; case PGRES_POLLING_READING: fds[nfds].fd = PQsocket(segdbDesc->conn); @@ -176,18 +173,22 @@ create_gang_retry: else { ereport(ERROR, (errcode(ERRCODE_GP_INTERCONNECTION_ERROR), - errmsg("failed to create gang on one or more segments"), + errmsg("failed to acquire resources on one or more segments"), errdetail("%s (%s)", PQerrorMessage(segdbDesc->conn), segdbDesc->whoami))); } break; default: - - ereport(ERROR, (errcode(ERRCODE_GP_INTERCONNECTION_ERROR), - errmsg("failed to create gang on one or more segments"), - errdetail("unknown pollStatus"))); + ereport(ERROR, (errcode(ERRCODE_GP_INTERCONNECTION_ERROR), + errmsg("failed to acquire resources on one or more segments"), + errdetail("unknow pollstatus (%s)", segdbDesc->whoami))); break; } + + if (timeout) + ereport(ERROR, (errcode(ERRCODE_GP_INTERCONNECTION_ERROR), + errmsg("failed to acquire resources on one or more segments"), + errdetail("timeout expired\n (%s)", segdbDesc->whoami))); } if (nfds == 0) @@ -195,10 +196,9 @@ create_gang_retry: CHECK_FOR_INTERRUPTS(); - timeout = getTimeout(&startTS); - /* Wait until something happens */ - nready = poll(fds, nfds, timeout); + nready = poll(fds, nfds, gp_segment_connect_timeout ? + gp_segment_connect_timeout : -1); if (nready < 0) { int sock_errno = SOCK_ERRNO; @@ -206,18 +206,11 @@ create_gang_retry: continue; ereport(ERROR, (errcode(ERRCODE_GP_INTERCONNECTION_ERROR), - errmsg("failed to create gang on one or more segments"), + errmsg("failed to acquire resources on one or more segments"), errdetail("poll() failed: errno = %d", sock_errno))); } else if (nready == 0) - { - if (timeout != 0) - continue; - - ereport(ERROR, (errcode(ERRCODE_GP_INTERCONNECTION_ERROR), - errmsg("failed to create gang on one or more segments"), - errdetail("createGang timeout after %d seconds", gp_segment_connect_timeout))); - } + timeout = true; } ELOG_DISPATCHER_DEBUG("createGang: %d processes requested; %d successful connections %d in recovery", @@ -234,14 +227,14 @@ create_gang_retry: if (isFTSEnabled() && FtsTestSegmentDBIsDown(newGangDefinition->db_descriptors, size)) ereport(ERROR, (errcode(ERRCODE_GP_INTERCONNECTION_ERROR), - errmsg("failed to create gang on one or more segments"), + errmsg("failed to acquire resources on one or more segments"), errdetail("FTS detected one or more segments are down"))); if ( gp_gang_creation_retry_count <= 0 || create_gang_retry_counter++ >= gp_gang_creation_retry_count || type != GANGTYPE_PRIMARY_WRITER) ereport(ERROR, (errcode(ERRCODE_GP_INTERCONNECTION_ERROR), - errmsg("failed to create gang on one or more segments"), + errmsg("failed to acquire resources on one or more segments"), errdetail("segments is in recovery mode"))); ELOG_DISPATCHER_DEBUG("createGang: gang creation failed, but retryable."); @@ -280,26 +273,3 @@ create_gang_retry: return newGangDefinition; } -static int getTimeout(const struct timeval* startTS) -{ - struct timeval now; - int timeout; - int64 diff_us; - - gettimeofday(&now, NULL); - - if (gp_segment_connect_timeout > 0) - { - diff_us = (now.tv_sec - startTS->tv_sec) * 1000000; - diff_us += (int) now.tv_usec - (int) startTS->tv_usec; - if (diff_us > (int64) gp_segment_connect_timeout * 1000000) - timeout = 0; - else - timeout = gp_segment_connect_timeout * 1000 - diff_us / 1000; - } - else - timeout = -1; - - return timeout; -} - diff --git a/src/backend/postmaster/postmaster.c b/src/backend/postmaster/postmaster.c index 348529ea9de60cb68c24a0260f3079345f2f8d92..132ad00a006e51c61dd2427216bf00c31eaa6f0f 100644 --- a/src/backend/postmaster/postmaster.c +++ b/src/backend/postmaster/postmaster.c @@ -2953,6 +2953,8 @@ retry1: break; } + SIMPLE_FAULT_INJECTOR(ProcessStartupPacketFault); + return STATUS_OK; } diff --git a/src/backend/tcop/postgres.c b/src/backend/tcop/postgres.c index ae4588270c30cb47c146313ba3fdf04452b1d4ed..55286ed4bfdb68624d3bce66c179192c04496513 100644 --- a/src/backend/tcop/postgres.c +++ b/src/backend/tcop/postgres.c @@ -3361,6 +3361,7 @@ drop_unnamed_stmt(void) void quickdie(SIGNAL_ARGS) { + SIMPLE_FAULT_INJECTOR(QuickDie); quickdie_impl(); } diff --git a/src/backend/utils/misc/faultinjector.c b/src/backend/utils/misc/faultinjector.c index 5a00f4481ec44162490afa9905e2196fd64c417d..707fc72798c7f28d0c31685d32e36b79ce920ec4 100644 --- a/src/backend/utils/misc/faultinjector.c +++ b/src/backend/utils/misc/faultinjector.c @@ -313,6 +313,12 @@ FaultInjectorIdentifierEnumToString[] = { /* inject fault while translating relcache entries */ _("send_qe_details_init_backend"), /* inject fault before sending QE details during backend initialization */ + _("process_startup_packet"), + /* inject fault in ProcessStartupPacket() */ + _("quickdie"), + /* inject fault in quickdie*/ + _("after_one_slice_dispatched"), + /* inject fault in cdbdisp_dispatchX*/ _("not recognized"), }; diff --git a/src/include/utils/faultinjector.h b/src/include/utils/faultinjector.h index 9f954cf7d11d2d769287bf5ac9cc6e88bd78b1ae..9f483ef6898145cc2aadddda0b1e7fc0ad36bd2c 100644 --- a/src/include/utils/faultinjector.h +++ b/src/include/utils/faultinjector.h @@ -209,6 +209,9 @@ typedef enum FaultInjectorIdentifier_e { OptRelcacheTranslatorCatalogAccess, SendQEDetailsInitBackend, + ProcessStartupPacketFault, + QuickDie, + AfterOneSliceDispatched, /* INSERT has to be done before that line */ FaultInjectorIdMax, diff --git a/src/test/regress/expected/dispatch.out b/src/test/regress/expected/dispatch.out deleted file mode 100644 index ccdfe5a571aef4081beb93dffdd3ed2345101884..0000000000000000000000000000000000000000 --- a/src/test/regress/expected/dispatch.out +++ /dev/null @@ -1,86 +0,0 @@ --- Misc tests related to dispatching queries to segments. --- Test quoting of GUC values and databse names when they're sent to segments --- There used to be a bug in the quoting when the search_path setting was sent --- to the segment. It was not easily visible when search_path was set with a --- SET command, only when the setting was sent as part of the startup packet. --- Set search_path as a per-user setting so that we can test that. -CREATE DATABASE "dispatch test db"; -ALTER DATABASE "dispatch test db" SET search_path="my schema",public; -NOTICE: schema "my schema" does not exist -\c "dispatch test db" -CREATE SCHEMA "my schema"; --- Create a table with the same name in both schemas, "my schema" and public. -CREATE TABLE "my table" (t text); -NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 't' as the Greenplum Database data distribution key for this table. -HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. -INSERT INTO "my table" VALUES ('myschema.mytable'); -CREATE TABLE public."my table" (t text); -NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 't' as the Greenplum Database data distribution key for this table. -HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. -INSERT INTO public."my table" VALUES ('public.mytable'); -SELECT t as unquoted FROM "my table"; - unquoted ------------------- - myschema.mytable -(1 row) - -SELECT t as myschema FROM "my schema"."my table"; - myschema ------------------- - myschema.mytable -(1 row) - -SELECT t as public FROM public."my table"; - public ----------------- - public.mytable -(1 row) - -DROP TABLE "my table"; -DROP TABLE public."my table"; --- Create another table with the same name. To make sure the DROP worked --- and dropped the correct table. -CREATE TABLE "my table" (id integer); -NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'id' as the Greenplum Database data distribution key for this table. -HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. -DROP TABLE "my table"; --- Clean up -\c regression -DROP DATABASE "dispatch test db"; --- --- test QD will report failure if QE fails to send its motion_listener back --- during backend initialization --- --- start_ignore -\! gpfaultinjector -f send_qe_details_init_backend -y reset -s 2 -20160823:15:12:59:015496 gpfaultinjector:localhost:gpadmin-[INFO]:-Starting gpfaultinjector with args: -f send_qe_details_init_backend -y reset -s 2 -20160823:15:12:59:015496 gpfaultinjector:localhost:gpadmin-[INFO]:-local Greenplum Version: 'postgres (Greenplum Database) 4.3.99.00 build dev' -20160823:15:12:59:015496 gpfaultinjector:localhost:gpadmin-[INFO]:-Obtaining Segment details from master... -20160823:15:12:59:015496 gpfaultinjector:localhost:gpadmin-[INFO]:-Injecting fault on 1 segment(s) -20160823:15:12:59:015496 gpfaultinjector:localhost:gpadmin-[INFO]:-Injecting fault on localhost.localdomain:/home/gpadmin/workspace/data/single_debug/primary/gpseg0:content=0:dbid=2:mode=s:status=u -20160823:15:12:59:015496 gpfaultinjector:localhost:gpadmin-[INFO]:-DONE --- inject a 'skip' fault before QE sends its motion_listener -\! gpfaultinjector -f send_qe_details_init_backend -y skip -s 2 -o 0 -20160823:15:12:59:015508 gpfaultinjector:localhost:gpadmin-[INFO]:-Starting gpfaultinjector with args: -f send_qe_details_init_backend -y skip -s 2 -o 0 -20160823:15:12:59:015508 gpfaultinjector:localhost:gpadmin-[INFO]:-local Greenplum Version: 'postgres (Greenplum Database) 4.3.99.00 build dev' -20160823:15:12:59:015508 gpfaultinjector:localhost:gpadmin-[INFO]:-Obtaining Segment details from master... -20160823:15:12:59:015508 gpfaultinjector:localhost:gpadmin-[INFO]:-Injecting fault on 1 segment(s) -20160823:15:12:59:015508 gpfaultinjector:localhost:gpadmin-[INFO]:-Injecting fault on localhost.localdomain:/home/gpadmin/workspace/data/single_debug/primary/gpseg0:content=0:dbid=2:mode=s:status=u -20160823:15:12:59:015508 gpfaultinjector:localhost:gpadmin-[INFO]:-DONE --- end_ignore --- terminate exiting QEs first -\c --- verify failure will be reported -SELECT 1 FROM gp_dist_random('gp_id'); -ERROR: failed to acquire resources on one or more segments -DETAIL: Internal error: No motion listener port (seg0 10.152.10.117:25432) --- reset fault injector --- start_ignore -\! gpfaultinjector -f send_qe_details_init_backend -y reset -s 2 -20160823:15:12:59:015522 gpfaultinjector:localhost:gpadmin-[INFO]:-Starting gpfaultinjector with args: -f send_qe_details_init_backend -y reset -s 2 -20160823:15:12:59:015522 gpfaultinjector:localhost:gpadmin-[INFO]:-local Greenplum Version: 'postgres (Greenplum Database) 4.3.99.00 build dev' -20160823:15:12:59:015522 gpfaultinjector:localhost:gpadmin-[INFO]:-Obtaining Segment details from master... -20160823:15:12:59:015522 gpfaultinjector:localhost:gpadmin-[INFO]:-Injecting fault on 1 segment(s) -20160823:15:12:59:015522 gpfaultinjector:localhost:gpadmin-[INFO]:-Injecting fault on localhost.localdomain:/home/gpadmin/workspace/data/single_debug/primary/gpseg0:content=0:dbid=2:mode=s:status=u -20160823:15:12:59:015522 gpfaultinjector:localhost:gpadmin-[INFO]:-DONE --- end_ignore diff --git a/src/test/regress/greenplum_schedule b/src/test/regress/greenplum_schedule index 3c2263e5956a6dbaeefa47eb52f04773deae1bfc..51a83ce44b505aed749871b3fc40aef533b8f3d8 100755 --- a/src/test/regress/greenplum_schedule +++ b/src/test/regress/greenplum_schedule @@ -25,7 +25,7 @@ ignore: leastsquares test: opr_sanity_gp decode_expr bitmapscan bitmapscan_ao case_gp limit_gp notin percentile naivebayes join_gp union_gp gpcopy gp_create_table test: filter gpctas gpdist matrix toast sublink table_functions olap_setup complex opclass_ddl bitmap_index information_schema test: indexjoin as_alias regex_gp gpparams with_clause transient_types gang_mgmt - +# dispatch should always run seperately from other cases. test: dispatch # 'segspace' relies on the segment spill space to be 0, and uses fault injectors diff --git a/src/test/regress/init_file b/src/test/regress/init_file index a31b74b871d4dc000d331e64e4bf4c1989e3bd4b..1d01ee7f9bb43b823f37b886fbb6be674b37c09f 100644 --- a/src/test/regress/init_file +++ b/src/test/regress/init_file @@ -118,6 +118,13 @@ s/Table "pg_temp_\d+.temp/Table "pg_temp_#####/ m/^LOG.*\"Feature/ s/^LOG.*\"Feature/\"Feature/ -m/^.*(seg\d.*:.*)/ -s/^.*(seg\d.*:.*)// +m/^DETAIL: Internal error: No motion listener port \(seg\d.*:.*\)/ +s/^DETAIL: Internal error: No motion listener port \(seg\d.*:.*\)// + +# Mask out gp_debug_linger HINT message for dispatch +m/^HINT: Process \d+ will wait for gp_debug_linger=\d+ seconds before termination\./ +s/^HINT: Process \d+ will wait for gp_debug_linger=\d+ seconds before termination\.// + +m/^ \(seg\d .*:\d+\)/ +s/^ \(seg\d .*:\d+\)// -- end_matchsubs diff --git a/src/test/regress/input/dispatch.source b/src/test/regress/input/dispatch.source new file mode 100644 index 0000000000000000000000000000000000000000..7378a40c244ff4156920f0867efd9a3ffc248497 --- /dev/null +++ b/src/test/regress/input/dispatch.source @@ -0,0 +1,237 @@ +-- Misc tests related to dispatching queries to segments. + +-- Test quoting of GUC values and databse names when they're sent to segments + +-- There used to be a bug in the quoting when the search_path setting was sent +-- to the segment. It was not easily visible when search_path was set with a +-- SET command, only when the setting was sent as part of the startup packet. +-- Set search_path as a per-user setting so that we can test that. +CREATE DATABASE "dispatch test db"; +ALTER DATABASE "dispatch test db" SET search_path="my schema",public; + +\c "dispatch test db" + +CREATE SCHEMA "my schema"; + +-- Create a table with the same name in both schemas, "my schema" and public. +CREATE TABLE "my table" (t text); +INSERT INTO "my table" VALUES ('myschema.mytable'); + +CREATE TABLE public."my table" (t text); +INSERT INTO public."my table" VALUES ('public.mytable'); + +SELECT t as unquoted FROM "my table"; +SELECT t as myschema FROM "my schema"."my table"; +SELECT t as public FROM public."my table"; + +DROP TABLE "my table"; +DROP TABLE public."my table"; + +-- Create another table with the same name. To make sure the DROP worked +-- and dropped the correct table. +CREATE TABLE "my table" (id integer); +DROP TABLE "my table"; + +-- Clean up +\c regression +DROP DATABASE "dispatch test db"; + +-- +-- test QD will report failure if QE fails to send its motion_listener back +-- during backend initialization +-- + +-- start_ignore +\! gpfaultinjector -q -f send_qe_details_init_backend -y reset -s 2 +-- inject a 'skip' fault before QE sends its motion_listener +\! gpfaultinjector -q -f send_qe_details_init_backend -y skip -s 2 -o 0 +-- end_ignore + +-- terminate exiting QEs first +\c +-- verify failure will be reported +SELECT 1 FROM gp_dist_random('gp_id'); + +-- reset fault injector +-- start_ignore +\! gpfaultinjector -q -f send_qe_details_init_backend -y reset -s 2 +-- end_ignore + +-- +-- Test suit : test gang creation and commands dispatching +-- +--start_ignore +drop table if exists dispatch_test; +drop table if exists dispatch_test_t1; +drop table if exists dispatch_test_t2; +drop table if exists dispatch_test_t3; +--end_ignore +create table dispatch_test as select i as c1 from generate_series(1, 10) i; +create table dispatch_test_t1 (c1 int, c2 int, c3 int); +create table dispatch_test_t2 (c1 int, c2 int, c3 int); +create table dispatch_test_t3 (c1 int, c2 int, c3 int); +insert into dispatch_test_t1 values (1,1,2); +insert into dispatch_test_t2 values (2,1,2); +insert into dispatch_test_t3 values (3,1,2); + +CREATE OR REPLACE FUNCTION cleanupAllGangs() RETURNS BOOL +AS '@abs_builddir@/regress@DLSUFFIX@', 'cleanupAllGangs' LANGUAGE C; + +-- check if segments has backend running +CREATE OR REPLACE FUNCTION numBackendsOnSegment() RETURNS INTEGER +AS '@abs_builddir@/regress@DLSUFFIX@', 'numBackendsOnSegment' LANGUAGE C; + +-- check if QD has reusable gangs +CREATE OR REPLACE FUNCTION hasGangsExist() RETURNS BOOL +AS '@abs_builddir@/regress@DLSUFFIX@', 'hasGangsExist' LANGUAGE C; + +CREATE VIEW v_hasBackendsOnSegment as select sum(numBackendsOnSegment()) > 0 from +gp_dist_random('gp_id'); + +-- disable debug linger to get immediate feedback from FATAL errors. +set gp_debug_linger to 0; + +-- test log debug related code within dispatch +set gp_log_gang to debug; +set log_min_messages to DEBUG; + +-- Case 1.1 +-- A segment in recovery mode, writer gang retry gp_gang_creation_retry_count times and finally success + +-- set maximun retry time to 60 seconds, this should be long enough for segment +-- recovery back. otherwise it should be bug somewhere +set gp_gang_creation_retry_count to 120; +set gp_gang_creation_retry_timer to 500; + +select cleanupAllGangs(); + +-- trigger fault and put segment 0 into recovery mode +\! gpfaultinjector -q -f process_startup_packet -y segv --seg_dbid 2 +--start_ignore +select 'trigger fault' from gp_dist_random('gp_id'); +--end_ignore + +-- should success after retry +select * from dispatch_test_t1, dispatch_test_t2, dispatch_test_t3 +where dispatch_test_t1.c2 = dispatch_test_t2.c2 and dispatch_test_t2.c3 = dispatch_test_t3.c3; + +\! gpfaultinjector -q -f process_startup_packet -y reset --seg_dbid 2 + +-- Case 1.2 +-- A segment in recovery mode for long time, writer gang retry gp_gang_creation_retry_count times and finally failed +-- set maximun retry time to 0.4s, so we can test if gp_gang_creation_retry_count +-- is actually work +set gp_gang_creation_retry_count to 2; +set gp_gang_creation_retry_timer to 200; +select cleanupAllGangs(); + +-- trigger fault and put segment 0 into recovery mode +\! gpfaultinjector -q -f process_startup_packet -y segv --seg_dbid 2 +\! gpfaultinjector -q -f quickdie -y suspend --seg_dbid 2 +--start_ignore +select 'trigger fault' from gp_dist_random('gp_id'); +--end_ignore + +-- should failed after 2 times +select * from dispatch_test_t1, dispatch_test_t2, dispatch_test_t3 +where dispatch_test_t1.c2 = dispatch_test_t2.c2 and dispatch_test_t2.c3 = dispatch_test_t3.c3; + +\! gpfaultinjector -q -f quickdie -y resume --seg_dbid 2 +\! gpfaultinjector -q -f process_startup_packet -y reset --seg_dbid 2 +\! gpfaultinjector -q -f quickdie -y reset --seg_dbid 2 + +--start_ignore +-- enlarge the retry count +set gp_gang_creation_retry_count to 128 ; +-- this will block until segment 0 recovery back, or report an error +-- after 24 seconds. +select 'wait recovery finish' from gp_dist_random('gp_id'); +--end_ignore + +-- cleanup all reusable gangs +select cleanupAllGangs(); + +-- expect no zombie backends left on segments +select * from v_hasBackendsOnSegment; + +-- should success +select * from dispatch_test_t1, dispatch_test_t2, dispatch_test_t3 +where dispatch_test_t1.c2 = dispatch_test_t2.c2 and dispatch_test_t2.c3 = dispatch_test_t3.c3; + +-- Case 1.3 +-- segment has non in-recovery-mode errors +-- when creating writer gang, an error reported and all gangs should be cleaned. +-- when creating reader gang, an error reported and only current gang is cleaned. +select cleanupAllGangs(); + +-- segment 0 report an error when get a request +\! gpfaultinjector -q -f process_startup_packet -y error --seg_dbid 2 + +-- expect failure +select * from dispatch_test_t1, dispatch_test_t2, dispatch_test_t3 +where dispatch_test_t1.c2 = dispatch_test_t2.c2 and dispatch_test_t2.c3 = dispatch_test_t3.c3; + +-- expect no resuable gang exist +select * from hasGangsExist(); +-- expect no zombie backends left on segments. +select * from v_hasBackendsOnSegment; + +-- cleanupAllGangs(); +select cleanupAllGangs(); + +\! gpfaultinjector -q -f process_startup_packet -y reset --seg_dbid 2 +-- segment 0 report an error when get the second request (reader gang creation request) +\! gpfaultinjector -q -f process_startup_packet -y error --seg_dbid 2 -o 3 + +-- expect failure +select * from dispatch_test_t1, dispatch_test_t2, dispatch_test_t3 +where dispatch_test_t1.c2 = dispatch_test_t2.c2 and dispatch_test_t2.c3 = dispatch_test_t3.c3; + +-- expect resuable gang exist +select * from hasGangsExist(); +-- expect QEs exist. +select * from v_hasBackendsOnSegment; + +\! gpfaultinjector -q -f process_startup_packet -y reset --seg_dbid 2 + +-- Case 1.4 +-- Test createGang timeout. +-- gp_segment_connect_timeout = 0 : wait forever +-- gp_segment_connect_timeout = 1 : wait 1 second +set gp_segment_connect_timeout to 1; +select cleanupAllGangs(); + +\! gpfaultinjector -q -f process_startup_packet -y suspend --seg_dbid 2 + +-- expect timeout failure +select * from dispatch_test_t1, dispatch_test_t2, dispatch_test_t3 +where dispatch_test_t1.c2 = dispatch_test_t2.c2 and dispatch_test_t2.c3 = dispatch_test_t3.c3; + +\! gpfaultinjector -q -f process_startup_packet -y resume --seg_dbid 2 +\! gpfaultinjector -q -f process_startup_packet -y reset --seg_dbid 2 + +set gp_segment_connect_timeout to 0; +select cleanupAllGangs(); + +\! gpfaultinjector -q -f process_startup_packet -y sleep --seg_dbid 2 -z 1 + +-- expect success +select * from dispatch_test_t1, dispatch_test_t2, dispatch_test_t3 +where dispatch_test_t1.c2 = dispatch_test_t2.c2 and dispatch_test_t2.c3 = dispatch_test_t3.c3; + +\! gpfaultinjector -q -f process_startup_packet -y reset --seg_dbid 2 + +-- Case 1.5 +-- query was cancelled when dispatching commands to one gang. +-- query should be cancelled as expected. + +-- must set log_min_messages to default when using interrupt, there is a bug in fault injection. +set log_min_messages to default; + +\! gpfaultinjector -q -f after_one_slice_dispatched -y interrupt --seg_dbid 1 + +-- should fail and report error +select * from dispatch_test_t1, dispatch_test_t2, dispatch_test_t3 +where dispatch_test_t1.c2 = dispatch_test_t2.c2 and dispatch_test_t2.c3 = dispatch_test_t3.c3; + +\! gpfaultinjector -q -f after_one_slice_dispatched -y reset --seg_dbid 1 diff --git a/src/test/regress/output/dispatch.source b/src/test/regress/output/dispatch.source new file mode 100644 index 0000000000000000000000000000000000000000..b75967541d035268e19044243351fb99116d52de --- /dev/null +++ b/src/test/regress/output/dispatch.source @@ -0,0 +1,324 @@ +-- Misc tests related to dispatching queries to segments. +-- Test quoting of GUC values and databse names when they're sent to segments +-- There used to be a bug in the quoting when the search_path setting was sent +-- to the segment. It was not easily visible when search_path was set with a +-- SET command, only when the setting was sent as part of the startup packet. +-- Set search_path as a per-user setting so that we can test that. +CREATE DATABASE "dispatch test db"; +ALTER DATABASE "dispatch test db" SET search_path="my schema",public; +NOTICE: schema "my schema" does not exist +\c "dispatch test db" +CREATE SCHEMA "my schema"; +-- Create a table with the same name in both schemas, "my schema" and public. +CREATE TABLE "my table" (t text); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 't' as the Greenplum Database data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. +INSERT INTO "my table" VALUES ('myschema.mytable'); +CREATE TABLE public."my table" (t text); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 't' as the Greenplum Database data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. +INSERT INTO public."my table" VALUES ('public.mytable'); +SELECT t as unquoted FROM "my table"; + unquoted +------------------ + myschema.mytable +(1 row) + +SELECT t as myschema FROM "my schema"."my table"; + myschema +------------------ + myschema.mytable +(1 row) + +SELECT t as public FROM public."my table"; + public +---------------- + public.mytable +(1 row) + +DROP TABLE "my table"; +DROP TABLE public."my table"; +-- Create another table with the same name. To make sure the DROP worked +-- and dropped the correct table. +CREATE TABLE "my table" (id integer); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'id' as the Greenplum Database data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. +DROP TABLE "my table"; +-- Clean up +\c regression +DROP DATABASE "dispatch test db"; +-- +-- test QD will report failure if QE fails to send its motion_listener back +-- during backend initialization +-- +-- start_ignore +\! gpfaultinjector -q -f send_qe_details_init_backend -y reset -s 2 +-- inject a 'skip' fault before QE sends its motion_listener +\! gpfaultinjector -q -f send_qe_details_init_backend -y skip -s 2 -o 0 +-- end_ignore +-- terminate exiting QEs first +\c +-- verify failure will be reported +SELECT 1 FROM gp_dist_random('gp_id'); +ERROR: failed to acquire resources on one or more segments +DETAIL: Internal error: No motion listener port (seg0 127.0.0.1:40000) +-- reset fault injector +-- start_ignore +\! gpfaultinjector -q -f send_qe_details_init_backend -y reset -s 2 +-- end_ignore +-- +-- Test suit : test gang creation and commands dispatching +-- +--start_ignore +drop table if exists dispatch_test; +NOTICE: table "dispatch_test" does not exist, skipping +drop table if exists dispatch_test_t1; +NOTICE: table "dispatch_test_t1" does not exist, skipping +drop table if exists dispatch_test_t2; +NOTICE: table "dispatch_test_t2" does not exist, skipping +drop table if exists dispatch_test_t3; +NOTICE: table "dispatch_test_t3" does not exist, skipping +--end_ignore +create table dispatch_test as select i as c1 from generate_series(1, 10) i; +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column(s) named 'c1' as the Greenplum Database data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. +create table dispatch_test_t1 (c1 int, c2 int, c3 int); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'c1' as the Greenplum Database data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. +create table dispatch_test_t2 (c1 int, c2 int, c3 int); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'c1' as the Greenplum Database data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. +create table dispatch_test_t3 (c1 int, c2 int, c3 int); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'c1' as the Greenplum Database data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. +insert into dispatch_test_t1 values (1,1,2); +insert into dispatch_test_t2 values (2,1,2); +insert into dispatch_test_t3 values (3,1,2); +CREATE OR REPLACE FUNCTION cleanupAllGangs() RETURNS BOOL +AS '@abs_builddir@/regress@DLSUFFIX@', 'cleanupAllGangs' LANGUAGE C; +-- check if segments has backend running +CREATE OR REPLACE FUNCTION numBackendsOnSegment() RETURNS INTEGER +AS '@abs_builddir@/regress@DLSUFFIX@', 'numBackendsOnSegment' LANGUAGE C; +-- check if QD has reusable gangs +CREATE OR REPLACE FUNCTION hasGangsExist() RETURNS BOOL +AS '@abs_builddir@/regress@DLSUFFIX@', 'hasGangsExist' LANGUAGE C; +CREATE VIEW v_hasBackendsOnSegment as select sum(numBackendsOnSegment()) > 0 from +gp_dist_random('gp_id'); +-- disable debug linger to get immediate feedback from FATAL errors. +set gp_debug_linger to 0; +-- test log debug related code within dispatch +set gp_log_gang to debug; +set log_min_messages to DEBUG; +-- Case 1.1 +-- A segment in recovery mode, writer gang retry gp_gang_creation_retry_count times and finally success +-- set maximun retry time to 60 seconds, this should be long enough for segment +-- recovery back. otherwise it should be bug somewhere +set gp_gang_creation_retry_count to 120; +set gp_gang_creation_retry_timer to 500; +select cleanupAllGangs(); + cleanupallgangs +----------------- + t +(1 row) + +-- trigger fault and put segment 0 into recovery mode +\! gpfaultinjector -q -f process_startup_packet -y segv --seg_dbid 2 +--start_ignore +select 'trigger fault' from gp_dist_random('gp_id'); +ERROR: failed to acquire resources on one or more segments +DETAIL: server closed the connection unexpectedly + This probably means the server terminated abnormally + before or while processing the request. + (seg0 127.0.0.1:40000) +--end_ignore +-- should success after retry +select * from dispatch_test_t1, dispatch_test_t2, dispatch_test_t3 +where dispatch_test_t1.c2 = dispatch_test_t2.c2 and dispatch_test_t2.c3 = dispatch_test_t3.c3; + c1 | c2 | c3 | c1 | c2 | c3 | c1 | c2 | c3 +----+----+----+----+----+----+----+----+---- + 1 | 1 | 2 | 2 | 1 | 2 | 3 | 1 | 2 +(1 row) + +\! gpfaultinjector -q -f process_startup_packet -y reset --seg_dbid 2 +-- Case 1.2 +-- A segment in recovery mode for long time, writer gang retry gp_gang_creation_retry_count times and finally failed +-- set maximun retry time to 0.4s, so we can test if gp_gang_creation_retry_count +-- is actually work +set gp_gang_creation_retry_count to 2; +set gp_gang_creation_retry_timer to 200; +select cleanupAllGangs(); + cleanupallgangs +----------------- + t +(1 row) + +-- trigger fault and put segment 0 into recovery mode +\! gpfaultinjector -q -f process_startup_packet -y segv --seg_dbid 2 +\! gpfaultinjector -q -f quickdie -y suspend --seg_dbid 2 +--start_ignore +select 'trigger fault' from gp_dist_random('gp_id'); +ERROR: failed to acquire resources on one or more segments +DETAIL: server closed the connection unexpectedly + This probably means the server terminated abnormally + before or while processing the request. + (seg0 127.0.0.1:40000) +--end_ignore +-- should failed after 2 times +select * from dispatch_test_t1, dispatch_test_t2, dispatch_test_t3 +where dispatch_test_t1.c2 = dispatch_test_t2.c2 and dispatch_test_t2.c3 = dispatch_test_t3.c3; +ERROR: failed to acquire resources on one or more segments +DETAIL: segments is in recovery mode +\! gpfaultinjector -q -f quickdie -y resume --seg_dbid 2 +\! gpfaultinjector -q -f process_startup_packet -y reset --seg_dbid 2 +\! gpfaultinjector -q -f quickdie -y reset --seg_dbid 2 +--start_ignore +-- enlarge the retry count +set gp_gang_creation_retry_count to 128 ; +-- this will block until segment 0 recovery back, or report an error +-- after 24 seconds. +select 'wait recovery finish' from gp_dist_random('gp_id'); + ?column? +---------------------- + wait recovery finish + wait recovery finish + wait recovery finish +(3 rows) + +--end_ignore +-- cleanup all reusable gangs +select cleanupAllGangs(); + cleanupallgangs +----------------- + t +(1 row) + +-- expect no zombie backends left on segments +select * from v_hasBackendsOnSegment; + ?column? +---------- + f +(1 row) + +-- should success +select * from dispatch_test_t1, dispatch_test_t2, dispatch_test_t3 +where dispatch_test_t1.c2 = dispatch_test_t2.c2 and dispatch_test_t2.c3 = dispatch_test_t3.c3; + c1 | c2 | c3 | c1 | c2 | c3 | c1 | c2 | c3 +----+----+----+----+----+----+----+----+---- + 1 | 1 | 2 | 2 | 1 | 2 | 3 | 1 | 2 +(1 row) + +-- Case 1.3 +-- segment has non in-recovery-mode errors +-- when creating writer gang, an error reported and all gangs should be cleaned. +-- when creating reader gang, an error reported and only current gang is cleaned. +select cleanupAllGangs(); + cleanupallgangs +----------------- + t +(1 row) + +-- segment 0 report an error when get a request +\! gpfaultinjector -q -f process_startup_packet -y error --seg_dbid 2 +-- expect failure +select * from dispatch_test_t1, dispatch_test_t2, dispatch_test_t3 +where dispatch_test_t1.c2 = dispatch_test_t2.c2 and dispatch_test_t2.c3 = dispatch_test_t3.c3; +ERROR: failed to acquire resources on one or more segments +DETAIL: FATAL: fault triggered, fault name:'process_startup_packet' fault type:'error' (faultinjector.c:683) +HINT: Process 8632 will wait for gp_debug_linger=120 seconds before termination. +Note that its locks and other resources will not be released until then. + (seg0 127.0.0.1:40000) +-- expect no resuable gang exist +select * from hasGangsExist(); + hasgangsexist +--------------- + f +(1 row) + +-- expect no zombie backends left on segments. +select * from v_hasBackendsOnSegment; + ?column? +---------- + f +(1 row) + +-- cleanupAllGangs(); +select cleanupAllGangs(); + cleanupallgangs +----------------- + t +(1 row) + +\! gpfaultinjector -q -f process_startup_packet -y reset --seg_dbid 2 +-- segment 0 report an error when get the second request (reader gang creation request) +\! gpfaultinjector -q -f process_startup_packet -y error --seg_dbid 2 -o 3 +-- expect failure +select * from dispatch_test_t1, dispatch_test_t2, dispatch_test_t3 +where dispatch_test_t1.c2 = dispatch_test_t2.c2 and dispatch_test_t2.c3 = dispatch_test_t3.c3; +ERROR: failed to acquire resources on one or more segments +DETAIL: FATAL: fault triggered, fault name:'process_startup_packet' fault type:'error' (faultinjector.c:683) +HINT: Process 8685 will wait for gp_debug_linger=120 seconds before termination. +Note that its locks and other resources will not be released until then. + (seg0 127.0.0.1:40000) +-- expect resuable gang exist +select * from hasGangsExist(); + hasgangsexist +--------------- + t +(1 row) + +-- expect QEs exist. +select * from v_hasBackendsOnSegment; + ?column? +---------- + t +(1 row) + +\! gpfaultinjector -q -f process_startup_packet -y reset --seg_dbid 2 +-- Case 1.4 +-- Test createGang timeout. +-- gp_segment_connect_timeout = 0 : wait forever +-- gp_segment_connect_timeout = 1 : wait 1 second +set gp_segment_connect_timeout to 1; +select cleanupAllGangs(); + cleanupallgangs +----------------- + t +(1 row) + +\! gpfaultinjector -q -f process_startup_packet -y suspend --seg_dbid 2 +-- expect timeout failure +select * from dispatch_test_t1, dispatch_test_t2, dispatch_test_t3 +where dispatch_test_t1.c2 = dispatch_test_t2.c2 and dispatch_test_t2.c3 = dispatch_test_t3.c3; +ERROR: failed to acquire resources on one or more segments +DETAIL: timeout expired + (seg0 10.22.22.22:40000) +\! gpfaultinjector -q -f process_startup_packet -y resume --seg_dbid 2 +\! gpfaultinjector -q -f process_startup_packet -y reset --seg_dbid 2 +set gp_segment_connect_timeout to 0; +select cleanupAllGangs(); + cleanupallgangs +----------------- + t +(1 row) + +\! gpfaultinjector -q -f process_startup_packet -y sleep --seg_dbid 2 -z 1 +-- expect success +select * from dispatch_test_t1, dispatch_test_t2, dispatch_test_t3 +where dispatch_test_t1.c2 = dispatch_test_t2.c2 and dispatch_test_t2.c3 = dispatch_test_t3.c3; + c1 | c2 | c3 | c1 | c2 | c3 | c1 | c2 | c3 +----+----+----+----+----+----+----+----+---- + 1 | 1 | 2 | 2 | 1 | 2 | 3 | 1 | 2 +(1 row) + +\! gpfaultinjector -q -f process_startup_packet -y reset --seg_dbid 2 +-- Case 1.5 +-- query was cancelled when dispatching commands to one gang. +-- query should be cancelled as expected. +-- must set log_min_messages to default when using interrupt, there is a bug in fault injection. +set log_min_messages to default; +\! gpfaultinjector -q -f after_one_slice_dispatched -y interrupt --seg_dbid 1 +-- should fail and report error +select * from dispatch_test_t1, dispatch_test_t2, dispatch_test_t3 +where dispatch_test_t1.c2 = dispatch_test_t2.c2 and dispatch_test_t2.c3 = dispatch_test_t3.c3; +ERROR: canceling statement due to user request +\! gpfaultinjector -q -f after_one_slice_dispatched -y reset --seg_dbid 1 diff --git a/src/test/regress/regress.c b/src/test/regress/regress.c index cdbaf77837114ac768917024b68f98726c079915..3574cbcbacf81164908c550d01354d0d72c15788 100644 --- a/src/test/regress/regress.c +++ b/src/test/regress/regress.c @@ -8,12 +8,15 @@ #include #include +#include +#include "pgstat.h" #include "access/transam.h" #include "access/xact.h" #include "catalog/pg_language.h" #include "catalog/pg_type.h" #include "cdb/memquota.h" +#include "cdb/cdbgang.h" #include "commands/sequence.h" #include "commands/trigger.h" #include "executor/executor.h" @@ -74,6 +77,15 @@ extern Datum checkRelationAfterInvalidation(PG_FUNCTION_ARGS); /* Gang management test support */ extern Datum gangRaiseInfo(PG_FUNCTION_ARGS); +/* brutally cleanup all gangs */ +extern Datum cleanupAllGangs(PG_FUNCTION_ARGS); + +/* check if QD has gangs exist */ +extern Datum hasGangsExist(PG_FUNCTION_ARGS); + +/* get number of backends on segments except myself */ +extern Datum numBackendsOnSegment(PG_FUNCTION_ARGS); + /* * test_atomic_ops was backported from 9.5. This prototype doesn't appear * in the upstream version, because the PG_FUNCTION_INFO_V1() macro includes @@ -2433,6 +2445,40 @@ gangRaiseInfo(PG_FUNCTION_ARGS) PG_RETURN_BOOL(true); } +PG_FUNCTION_INFO_V1(cleanupAllGangs); +Datum +cleanupAllGangs(PG_FUNCTION_ARGS) +{ + disconnectAndDestroyAllGangs(false); + PG_RETURN_BOOL(true); +} + +PG_FUNCTION_INFO_V1(hasGangsExist); +Datum +hasGangsExist(PG_FUNCTION_ARGS) +{ + if (gangsExist()) + PG_RETURN_BOOL(true); + PG_RETURN_BOOL(false); +} + +PG_FUNCTION_INFO_V1(numBackendsOnSegment); +Datum +numBackendsOnSegment(PG_FUNCTION_ARGS) +{ + int beid; + int32 result = 0; + int pid = getpid(); + int tot_backends = pgstat_fetch_stat_numbackends(); + for (beid = 1; beid <= tot_backends; beid++) + { + PgBackendStatus *beentry = pgstat_fetch_stat_beentry(beid); + if (beentry && beentry->st_procpid >0 && beentry->st_procpid != pid) + result++; + } + + PG_RETURN_INT32(result); +} #ifndef PG_HAVE_ATOMIC_FLAG_SIMULATION static void diff --git a/src/test/regress/sql/dispatch.sql b/src/test/regress/sql/dispatch.sql deleted file mode 100644 index f99a1ff329bc70b6c157d3a3cdf4e2c339f72e19..0000000000000000000000000000000000000000 --- a/src/test/regress/sql/dispatch.sql +++ /dev/null @@ -1,58 +0,0 @@ --- Misc tests related to dispatching queries to segments. - --- Test quoting of GUC values and databse names when they're sent to segments - --- There used to be a bug in the quoting when the search_path setting was sent --- to the segment. It was not easily visible when search_path was set with a --- SET command, only when the setting was sent as part of the startup packet. --- Set search_path as a per-user setting so that we can test that. -CREATE DATABASE "dispatch test db"; -ALTER DATABASE "dispatch test db" SET search_path="my schema",public; - -\c "dispatch test db" - -CREATE SCHEMA "my schema"; - --- Create a table with the same name in both schemas, "my schema" and public. -CREATE TABLE "my table" (t text); -INSERT INTO "my table" VALUES ('myschema.mytable'); - -CREATE TABLE public."my table" (t text); -INSERT INTO public."my table" VALUES ('public.mytable'); - -SELECT t as unquoted FROM "my table"; -SELECT t as myschema FROM "my schema"."my table"; -SELECT t as public FROM public."my table"; - -DROP TABLE "my table"; -DROP TABLE public."my table"; - --- Create another table with the same name. To make sure the DROP worked --- and dropped the correct table. -CREATE TABLE "my table" (id integer); -DROP TABLE "my table"; - --- Clean up -\c regression -DROP DATABASE "dispatch test db"; - --- --- test QD will report failure if QE fails to send its motion_listener back --- during backend initialization --- - --- start_ignore -\! gpfaultinjector -f send_qe_details_init_backend -y reset -s 2 --- inject a 'skip' fault before QE sends its motion_listener -\! gpfaultinjector -f send_qe_details_init_backend -y skip -s 2 -o 0 --- end_ignore - --- terminate exiting QEs first -\c --- verify failure will be reported -SELECT 1 FROM gp_dist_random('gp_id'); - --- reset fault injector --- start_ignore -\! gpfaultinjector -f send_qe_details_init_backend -y reset -s 2 --- end_ignore