From 851013172450d1140f3db823a78058116a4e2baf Mon Sep 17 00:00:00 2001 From: David Kimura Date: Tue, 17 Apr 2018 16:18:15 -0700 Subject: [PATCH] Speed up dispatcher detection of segment state changes Dispatcher has DISPATCH_WAIT_TIMEOUT_MSEC (current value is 2000) as poll timeout. It waited for 30 iterations of poll to timeout before checking the segment status. And then initiated fts probe before checking the segment status. As a result it took ~minute for query to fail in case of segment failures. This commit updates to check segment status on every poll timeout. It also leverages fts version to optimize whether to check segments. It avoids performing fts probe, instead it relies on fts to be called on regular intervals and provide cached results. With this change test time for twophase_tolerance_with_mirror_promotion was cut down by ~2 minutes. Co-authored-by: Ashwin Agrawal --- src/backend/cdb/dispatcher/cdbdisp_async.c | 28 +++++++++++++--------- 1 file changed, 17 insertions(+), 11 deletions(-) diff --git a/src/backend/cdb/dispatcher/cdbdisp_async.c b/src/backend/cdb/dispatcher/cdbdisp_async.c index 953111f7e0..ae2d7e33d6 100644 --- a/src/backend/cdb/dispatcher/cdbdisp_async.c +++ b/src/backend/cdb/dispatcher/cdbdisp_async.c @@ -80,8 +80,6 @@ typedef struct CdbDispatchCmdAsync } CdbDispatchCmdAsync; -static int timeoutCounter = 0; - static void *cdbdisp_makeDispatchParams_async(int maxSlices, char *queryText, int len); static void cdbdisp_checkDispatchResult_async(struct CdbDispatcherState *ds, @@ -405,6 +403,7 @@ checkDispatchResult(CdbDispatcherState *ds, int timeout = 0; bool sentSignal = false; struct pollfd *fds; + uint8 ftsVersion = 0; db_count = pParms->dispatchCount; fds = (struct pollfd *) palloc(db_count * sizeof(struct pollfd)); @@ -515,6 +514,13 @@ checkDispatchResult(CdbDispatcherState *ds, elog(LOG, "handlePollError poll() failed; errno=%d", sock_errno); handlePollError(pParms); + + /* + * Since an error was detected for the segment, request + * FTS to perform a probe before checking the segment + * state. + */ + FtsNotifyProber(); checkSegmentAlive(pParms); if (pParms->waitMode != DISPATCH_WAIT_NONE) @@ -535,10 +541,17 @@ checkDispatchResult(CdbDispatcherState *ds, sentSignal = true; } - if (timeoutCounter++ > (wait ? 30 : 300)) + /* + * This code relies on FTS being triggered at regular + * intervals. Iff FTS detects change in configuration + * then check segment state. FTS probe is not triggered + * explicitly in this case because this happens every + * DISPATCH_WAIT_TIMEOUT_MSEC. + */ + if (ftsVersion == 0 || ftsVersion != getFtsVersion()) { + ftsVersion = getFtsVersion(); checkSegmentAlive(pParms); - timeoutCounter = 0; } if (!wait) @@ -771,7 +784,6 @@ static void checkSegmentAlive(CdbDispatchCmdAsync *pParms) { int i; - bool forceScan = true; /* * check the connection still valid @@ -796,12 +808,6 @@ checkSegmentAlive(CdbDispatchCmdAsync *pParms) ELOG_DISPATCHER_DEBUG("FTS testing connection %d of %d (%s)", i + 1, pParms->dispatchCount, segdbDesc->whoami); - if (forceScan) - { - FtsNotifyProber(); - forceScan = false; - } - if (!FtsIsSegmentUp(segdbDesc->segment_database_info)) { char *msg = PQerrorMessage(segdbDesc->conn); -- GitLab