From 2fbdc825ab2e1fee4b77bc420e77f16b631284bf Mon Sep 17 00:00:00 2001 From: Ben Christel Date: Wed, 21 Mar 2018 14:27:42 -0700 Subject: [PATCH] Refactor idle sessions gang killing to new file The functionality for gp_vmem_idle_resource_timeout is implemented by these files. Refactor to make it TDDable and easier to understand. We moved DisconnectAndDestroyUnusedGangs to cdbgang because it will be a natural seam for our mocks when we write more unit tests that expect it to be called. Also, it seems to belong to that file given all of the functions it calls are already in cdbgang. Co-authored-by: Ben Christel Co-authored-by: Amil Khanzada --- src/backend/cdb/dispatcher/README.md | 8 +- src/backend/cdb/dispatcher/cdbgang.c | 46 +++++++++ src/backend/storage/lmgr/proc.c | 67 +------------ src/backend/tcop/Makefile | 3 + src/backend/tcop/idle_resource_cleaner.c | 97 +++++++++++++++++++ src/backend/tcop/postgres.c | 29 +----- src/backend/tcop/test/Makefile | 6 +- .../tcop/test/idle_resource_cleaner_test.c | 48 +++++++++ src/backend/utils/misc/guc_gp.c | 1 + src/include/cdb/cdbgang.h | 1 + src/include/storage/proc.h | 1 - src/include/tcop/idle_resource_cleaner.h | 25 +++++ 12 files changed, 239 insertions(+), 93 deletions(-) create mode 100644 src/backend/tcop/idle_resource_cleaner.c create mode 100644 src/backend/tcop/test/idle_resource_cleaner_test.c create mode 100644 src/include/tcop/idle_resource_cleaner.h diff --git a/src/backend/cdb/dispatcher/README.md b/src/backend/cdb/dispatcher/README.md index 43ad4116e6..30a4edcd60 100644 --- a/src/backend/cdb/dispatcher/README.md +++ b/src/backend/cdb/dispatcher/README.md @@ -1,6 +1,10 @@ ## Dispatcher API -This document illustrates the interfaces of a GPDB component called dispatcher, which is responsible for 1) building connections from master node to segments, -2) managing query/plan dispatching, 3) and collecting query execution results. The implementation of dispatcher is mainly located under this directory +This document illustrates the interfaces of a GPDB component called dispatcher, which is responsible for +1) building connections from master node to segments, +2) managing query/plan dispatching, and +3) collecting query execution results. + +The implementation of dispatcher is mainly located under this directory. ### Terms Used: * Gang: Gang refers to a group of processes on segments. There are 4 types of Gang: diff --git a/src/backend/cdb/dispatcher/cdbgang.c b/src/backend/cdb/dispatcher/cdbgang.c index 1287e2dd27..73af4ba17f 100644 --- a/src/backend/cdb/dispatcher/cdbgang.c +++ b/src/backend/cdb/dispatcher/cdbgang.c @@ -1266,6 +1266,52 @@ disconnectAndDestroyIdleReaderGangs(void) return; } +/* + * Destroy all idle (i.e available) reader gangs. + * It is always safe to get rid of the reader gangs. + * + * If we are not in a transaction and we do not have a TempNamespace, destroy + * writer gangs as well. + * + * call only from an idle session. + */ +void DisconnectAndDestroyUnusedGangs(void) +{ + /* + * Free gangs to free up resources on the segDBs. + */ + if (GangsExist()) + { + if (IsTransactionOrTransactionBlock() || TempNamespaceOidIsValid()) + { + /* + * If we are in a transaction, we can't release the writer gang, + * as this will abort the transaction. + * + * If we have a TempNameSpace, we can't release the writer gang, as this + * would drop any temp tables we own. + * + * Since we are idle, any reader gangs will be available but not allocated. + */ + disconnectAndDestroyIdleReaderGangs(); + } + else + { + /* + * Get rid of ALL gangs... Readers and primary writer. + * After this, we have no resources being consumed on the segDBs at all. + * + * Our session wasn't destroyed due to an fatal error or FTS action, so + * we don't need to do anything special. Specifically, we DON'T want + * to act like we are now in a new session, since that would be confusing + * in the log. + * + */ + DisconnectAndDestroyAllGangs(false); + } + } +} + /* * Cleanup a Gang, make it recyclable. * diff --git a/src/backend/storage/lmgr/proc.c b/src/backend/storage/lmgr/proc.c index e5b9936d34..37f86533dc 100644 --- a/src/backend/storage/lmgr/proc.c +++ b/src/backend/storage/lmgr/proc.c @@ -62,16 +62,15 @@ #include "utils/sharedsnapshot.h" /*SharedLocalSnapshotSlot*/ #include "cdb/cdblocaldistribxact.h" -#include "cdb/cdbgang.h" #include "cdb/cdbtm.h" #include "cdb/cdbvars.h" /*Gp_is_writer*/ #include "port/atomics.h" #include "utils/session_state.h" +#include "tcop/idle_resource_cleaner.h" /* GUC variables */ int DeadlockTimeout = 1000; int StatementTimeout = 0; -int IdleSessionGangTimeout = 18000; bool log_lock_waits = false; /* Pointer to this process's PGPROC struct, if any */ @@ -1771,68 +1770,6 @@ disable_sig_alarm(bool is_statement_timeout) return true; } -/* - * We get here when a session has been idle for a while (waiting for the - * client to send us SQL to execute). The idea is to consume less resources while sitting idle, - * so we can support more sessions being logged on. - * - * The expectation is that if the session is logged on, but nobody is sending us work to do, - * we want to free up whatever resources we can. Usually it means there is a human being at the - * other end of the connection, and that person has walked away from their terminal, or just hasn't - * decided what to do next. We could be idle for a very long time (many hours). - * - * Of course, freeing gangs means that the next time the user does send in an SQL statement, - * we need to allocate gangs (at least the writer gang) to do anything. This entails extra work, - * so we don't want to do this if we don't think the session has gone idle. - * - * P.s: Is there anything we can free up on the master (QD) side? I can't think of anything. - * - */ -static void -HandleClientWaitTimeout(void) -{ - elog(DEBUG2,"HandleClientWaitTimeout"); - - /* - * cancel the timer, as there is no reason we need it to go off again. - */ - disable_sig_alarm(false); - - /* - * Free gangs to free up resources on the segDBs. - */ - if (GangsExist()) - { - if (IsTransactionOrTransactionBlock() || TempNamespaceOidIsValid()) - { - /* - * If we are in a transaction, we can't release the writer gang, - * as this will abort the transaction. - * - * If we have a TempNameSpace, we can't release the writer gang, as this - * would drop any temp tables we own. - * - * Since we are idle, any reader gangs will be available but not allocated. - */ - disconnectAndDestroyIdleReaderGangs(); - } - else - { - /* - * Get rid of ALL gangs... Readers and primary writer. - * After this, we have no resources being consumed on the segDBs at all. - * - * Our session wasn't destroyed due to an fatal error or FTS action, so - * we don't need to do anything special. Specifically, we DON'T want - * to act like we are now in a new session, since that would be confusing - * in the log. - * - */ - DisconnectAndDestroyAllGangs(false); - } - } -} - /* * Check for statement timeout. If the timeout time has come, * trigger a query-cancel interrupt; if not, reschedule the SIGALRM @@ -2043,7 +1980,7 @@ ProcessClientWaitTimeout(void) clientWaitTimeoutInterruptOccurred = 0; - HandleClientWaitTimeout(); + DoIdleResourceCleanup(); if (notify_enabled) EnableNotifyInterrupt(); diff --git a/src/backend/tcop/Makefile b/src/backend/tcop/Makefile index 0b8f3a6e03..b53d4d98d3 100644 --- a/src/backend/tcop/Makefile +++ b/src/backend/tcop/Makefile @@ -14,6 +14,9 @@ include $(top_builddir)/src/Makefile.global OBJS= dest.o fastpath.o postgres.o pquery.o utility.o +# Greenplum objects +OBJS+= idle_resource_cleaner.o + ifneq (,$(filter $(PORTNAME),cygwin win32)) override CPPFLAGS += -DWIN32_STACK_RLIMIT=$(WIN32_STACK_RLIMIT) endif diff --git a/src/backend/tcop/idle_resource_cleaner.c b/src/backend/tcop/idle_resource_cleaner.c new file mode 100644 index 0000000000..54a83ad1fd --- /dev/null +++ b/src/backend/tcop/idle_resource_cleaner.c @@ -0,0 +1,97 @@ +/*------------------------------------------------------------------------- + * + * idle_resource_cleaner.c + * + * + * Portions Copyright (c) 2012-Present Pivotal Software, Inc. + * + * + * IDENTIFICATION + * src/backend/tcop/idle_resource_cleaner.c + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include "cdb/cdbgang.h" +#include "storage/proc.h" +#include "tcop/idle_resource_cleaner.h" + +int IdleSessionGangTimeout = 18000; + +static int +get_idle_gang_timeout(void) +{ + return GangsExist() ? IdleSessionGangTimeout : 0; +} + +static void +idle_gang_timeout_action(void) +{ + DisconnectAndDestroyUnusedGangs(); +} + +/* + * We want to check to see if our session goes "idle" (nobody sending us work to + * do). We decide this is true if after waiting a while, we don't get a message + * from the client. + * We can then free resources (right now, just the gangs on the segDBs). + * + * A bit ugly: We share the sig alarm timer with the deadlock detection. + * We know which it is (deadlock detection needs to run or idle + * session resource release) based on the DoingCommandRead flag. + * + * Note we don't need to worry about the statement timeout timer + * because it can't be running when we are idle. + * + * We want the time value to be long enough so we don't free gangs prematurely. + * This means giving the end user enough time to type in the next SQL statement + * + * + * GPDB_93_MERGE_FIXME: replace the enable_sig_alarm() calls with the + * new functionality provided in timeout.c from PG 9.3. + */ +void StartIdleResourceCleanupTimers() +{ + int sigalarm_timeout = get_idle_gang_timeout(); + + if (sigalarm_timeout > 0) + { + if (!enable_sig_alarm(sigalarm_timeout, false)) + elog(FATAL, "could not set timer for client wait timeout"); + } +} + +/* + * We get here when a session has been idle for a while (waiting for the client + * to send us SQL to execute). The idea is to consume less resources while + * sitting idle, so we can support more sessions being logged on. + * + * The expectation is that if the session is logged on, but nobody is sending us + * work to do, we want to free up whatever resources we can. Usually it means + * there is a human being at the other end of the connection, and that person + * has walked away from their terminal, or just hasn't decided what to do next. + * We could be idle for a very long time (many hours). + * + * Of course, freeing gangs means that the next time the user does send in an + * SQL statement, we need to allocate gangs (at least the writer gang) to do + * anything. This entails extra work, so we don't want to do this if we don't + * think the session has gone idle. + * + * PS: Is there anything we can free up on the master (QD) side? I can't + * think of anything. + * + */ +void +DoIdleResourceCleanup(void) +{ + elog(DEBUG2,"DoIdleResourceCleanup"); + + /* + * Cancel the timer, as there is no reason we need it to go off again + * (setitimer repeats by default). + */ + disable_sig_alarm(false); + + idle_gang_timeout_action(); +} diff --git a/src/backend/tcop/postgres.c b/src/backend/tcop/postgres.c index 65e8422725..10bc08f9f6 100644 --- a/src/backend/tcop/postgres.c +++ b/src/backend/tcop/postgres.c @@ -101,6 +101,7 @@ #include "utils/session_state.h" #include "utils/vmem_tracker.h" +#include "tcop/idle_resource_cleaner.h" extern char *optarg; extern int optind; @@ -5132,37 +5133,17 @@ PostgresMain(int argc, char *argv[], /* * (2b) Check for temp table delete reset session work. + * Also clean up idle resources. */ if (Gp_role == GP_ROLE_DISPATCH) + { CheckForResetSession(); + StartIdleResourceCleanupTimers(); + } /* * (3) read a command (loop blocks here) */ - if (Gp_role == GP_ROLE_DISPATCH) - { - /* - * We want to check to see if our session goes "idle" (nobody sending us work to do) - * We decide this it true if after waiting a while, we don't get a message from the client. - * We can then free resources (right now, just the gangs on the segDBs). - * - * A Bit ugly: We share the sig alarm timer with the deadlock detection. - * We know which it is (deadlock detection needs to run or idle - * session resource release) based on the DoingCommandRead flag. - * - * Perhaps instead of calling enable_sig_alarm, we should just call - * setitimer() directly (we don't need to worry about the statement timeout timer - * because it can't be running when we are idle). - * - * We want the time value to be long enough so we don't free gangs prematurely. - * This means giving the end user enough time to type in the next SQL statement - * - */ - if (IdleSessionGangTimeout > 0 && GangsExist()) - if (!enable_sig_alarm( IdleSessionGangTimeout /* ms */, false)) - elog(FATAL, "could not set timer for client wait timeout"); - } - firstchar = ReadCommand(&input_message); /* diff --git a/src/backend/tcop/test/Makefile b/src/backend/tcop/test/Makefile index cd2d510ed5..a241cb7772 100644 --- a/src/backend/tcop/test/Makefile +++ b/src/backend/tcop/test/Makefile @@ -2,7 +2,7 @@ subdir=src/backend/tcop top_builddir=../../../.. include $(top_builddir)/src/Makefile.global -TARGETS=postgres +TARGETS=postgres idle_resource_cleaner include $(top_builddir)/src/backend/mock.mk @@ -10,3 +10,7 @@ postgres.t: \ $(MOCK_DIR)/backend/commands/async_mock.o \ $(MOCK_DIR)/backend/storage/ipc/sinval_mock.o \ $(MOCK_DIR)/backend/utils/error/elog_mock.o + +idle_resource_cleaner.t: \ + $(MOCK_DIR)/backend/cdb/dispatcher/cdbgang_mock.o \ + $(MOCK_DIR)/backend/storage/lmgr/proc_mock.o diff --git a/src/backend/tcop/test/idle_resource_cleaner_test.c b/src/backend/tcop/test/idle_resource_cleaner_test.c new file mode 100644 index 0000000000..1afd852da6 --- /dev/null +++ b/src/backend/tcop/test/idle_resource_cleaner_test.c @@ -0,0 +1,48 @@ +#include "../idle_resource_cleaner.c" + +#include +#include +#include +#include "cmockery.h" + +void +test__StartIdleResourceCleanupTimersWhenNoGangsExist( + void **state) +{ + IdleSessionGangTimeout = 10000; + + will_return(GangsExist, false); + /* cmockery implicitly asserts that enable_sig_alarm is not called */ + + StartIdleResourceCleanupTimers(); +} + +void +test__StartIdleResourceCleanupTimersWhenGangsExist( + void **state) +{ + IdleSessionGangTimeout = 10000; + + will_return(GangsExist, true); + /* cmockery implicitly asserts that enable_sig_alarm is not called */ + will_return(enable_sig_alarm, true); + + expect_value(enable_sig_alarm, delayms, 10000); + expect_value(enable_sig_alarm, is_statement_timeout, false); + + StartIdleResourceCleanupTimers(); +} + +int +main(int argc, char *argv[]) +{ + cmockery_parse_arguments(argc, argv); + + const UnitTest tests[] = { + unit_test(test__StartIdleResourceCleanupTimersWhenNoGangsExist), + unit_test(test__StartIdleResourceCleanupTimersWhenGangsExist), + + }; + + run_tests(tests); +} diff --git a/src/backend/utils/misc/guc_gp.c b/src/backend/utils/misc/guc_gp.c index f61a72e6d9..d322482f26 100644 --- a/src/backend/utils/misc/guc_gp.c +++ b/src/backend/utils/misc/guc_gp.c @@ -39,6 +39,7 @@ #include "replication/walsender.h" #include "storage/bfz.h" #include "storage/proc.h" +#include "tcop/idle_resource_cleaner.h" #include "utils/builtins.h" #include "utils/guc_tables.h" #include "utils/inval.h" diff --git a/src/include/cdb/cdbgang.h b/src/include/cdb/cdbgang.h index f93545443c..4fe39501ea 100644 --- a/src/include/cdb/cdbgang.h +++ b/src/include/cdb/cdbgang.h @@ -86,6 +86,7 @@ extern void freeGangsForPortal(char *portal_name); extern void DisconnectAndDestroyGang(Gang *gp); extern void DisconnectAndDestroyAllGangs(bool resetSession); +extern void DisconnectAndDestroyUnusedGangs(void); extern void CheckForResetSession(void); diff --git a/src/include/storage/proc.h b/src/include/storage/proc.h index 1b41ef3b7f..03de7ed815 100644 --- a/src/include/storage/proc.h +++ b/src/include/storage/proc.h @@ -238,7 +238,6 @@ typedef struct PROC_HDR extern int DeadlockTimeout; extern int StatementTimeout; extern bool log_lock_waits; -extern int IdleSessionGangTimeout; extern volatile bool cancel_from_timeout; diff --git a/src/include/tcop/idle_resource_cleaner.h b/src/include/tcop/idle_resource_cleaner.h new file mode 100644 index 0000000000..99c0103295 --- /dev/null +++ b/src/include/tcop/idle_resource_cleaner.h @@ -0,0 +1,25 @@ +/*------------------------------------------------------------------------- + * + * idle_resource_cleaner.h + * Functions used to clean up resources on idle sessions. + * + * + * Portions Copyright (c) 2012-Present Pivotal Software, Inc. + * + * + * IDENTIFICATION + * src/include/tcop/idle_resource_cleaner.h + * + *------------------------------------------------------------------------- + */ +#ifndef IDLE_RESOURCE_CLEANER_H +#define IDLE_RESOURCE_CLEANER_H + + +void StartIdleResourceCleanupTimers(void); + +void DoIdleResourceCleanup(void); + +extern int IdleSessionGangTimeout; + +#endif /* IDLE_RESOURCE_CLEANER_H */ -- GitLab