From 739734fb85d7e941717d51334ef673ac2b8ec7f6 Mon Sep 17 00:00:00 2001 From: Jimmy Yih Date: Mon, 23 Jan 2017 17:57:35 -0800 Subject: [PATCH] Async gang recreation should PQconnectPoll on bad fd poll. There are segment recovery scenarios where revent would be POLLNVAL and event as POLLOUT. This would cause an infinite loop until the default 10 minute timeout is reached. Because of this, the FTS portion at the bottom of the createGang_async() function does not get correctly executed. This patch adds checking the fd poll revent for POLLERR, POLLHUP, and POLLNVAL to call a PQconnectPoll so that polling status PGRES_POLLING_WRITING can correctly update to PGRES_POLLING_FAILED. It will then be able to exit the loop and execute the FTS stuff. --- src/backend/cdb/dispatcher/cdbgang_async.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/backend/cdb/dispatcher/cdbgang_async.c b/src/backend/cdb/dispatcher/cdbgang_async.c index 68d54204a5..61bc3e4442 100644 --- a/src/backend/cdb/dispatcher/cdbgang_async.c +++ b/src/backend/cdb/dispatcher/cdbgang_async.c @@ -239,7 +239,8 @@ create_gang_retry: Assert(PQsocket(segdbDesc->conn) > 0); Assert(PQsocket(segdbDesc->conn) == fds[currentFdNumber].fd); - if (fds[currentFdNumber].revents & fds[currentFdNumber].events) + if (fds[currentFdNumber].revents & fds[currentFdNumber].events || + fds[currentFdNumber].revents & (POLLERR | POLLHUP | POLLNVAL)) pollingStatus[i] = PQconnectPoll(segdbDesc->conn); currentFdNumber++; -- GitLab