提交 19695040 编写于 作者: A Alexey Milovidov

Fixed leader election [#METR-20132].

上级 48d502a2
......@@ -19,13 +19,36 @@ public:
*/
LeaderElection(const std::string & path_, ZooKeeper & zookeeper_, LeadershipHandler handler_, const std::string & identifier_ = "")
: path(path_), zookeeper(zookeeper_), handler(handler_), identifier(identifier_),
shutdown(false), state(WAITING_LEADERSHIP), log(&Logger::get("LeaderElection"))
log(&Logger::get("LeaderElection"))
{
node = EphemeralNodeHolder::createSequential(path + "/leader_election-", zookeeper, identifier);
std::string node_path = node->getPath();
node_name = node_path.substr(node_path.find_last_of('/') + 1);
/** Если есть ноды с таким же ephemeralOwner, то удалим их.
* Такие ноды могли остаться после неуспешного удаления, если сессия при этом не истекла.
*/
zkutil::Stat node_stat;
zookeeper.get(node_path, &node_stat);
Strings brothers = zookeeper.getChildren(path);
for (const auto & brother : brothers)
{
if (brother == node_name)
continue;
zkutil::Stat brother_stat;
std::string brother_path = path + "/" + brother;
zookeeper.get(brother_path, &brother_stat);
if (brother_stat.ephemeralOwner == node_stat.ephemeralOwner)
{
LOG_WARNING(log, "Found obsolete ephemeral node from same session, removing: " + brother_path);
zookeeper.tryRemoveWithRetries(brother_path);
}
}
thread = std::thread(&LeaderElection::threadFunction, this);
}
......@@ -78,10 +101,10 @@ private:
std::string node_name;
std::thread thread;
volatile bool shutdown;
volatile bool shutdown = false;
zkutil::EventPtr event = new Poco::Event();
State state;
State state = WAITING_LEADERSHIP;
Logger * log;
......@@ -111,24 +134,9 @@ private:
success = true;
}
catch (const DB::Exception & e)
{
LOG_ERROR(log, "Exception in LeaderElection: Code: " << e.code() << ". " << e.displayText() << std::endl
<< std::endl
<< "Stack trace:" << std::endl
<< e.getStackTrace().toString());
}
catch (const Poco::Exception & e)
{
LOG_ERROR(log, "Poco::Exception in LeaderElection: " << e.code() << ". " << e.displayText());
}
catch (const std::exception & e)
{
LOG_ERROR(log, "std::exception in LeaderElection: " << e.what());
}
catch (...)
{
LOG_ERROR(log, "Unknown exception in LeaderElection");
DB::tryLogCurrentException("LeaderElection");
}
if (!success)
......
......@@ -395,16 +395,38 @@ public:
{
try
{
/** Важно, что в случае недоступности ZooKeeper, делаются повторные попытки удалить ноду, пока не истечёт сессия.
/** Важно, что в случае недоступности ZooKeeper, делаются повторные попытки удалить ноду.
* Иначе возможна ситуация, когда объект EphemeralNodeHolder уничтожен,
* но сессия восстановится в течние session timeout, и эфемерная нода в ZooKeeper останется ещё надолго.
* А это может сломать механизм leader election и блокировок.
* Но см. ниже - на самом деле, такая ситуация всё-равно возможна.
*/
zookeeper.tryRemoveWithRetries(path);
}
catch (const KeeperException & e)
{
LOG_ERROR(zookeeper.log, "~EphemeralNodeHolder(): " << e.displayText());
/** На самом деле, сессия может ещё жить после этой ошибки.
* Поэтому не стоит рассчитывать, что эфемерная нода действительно будет удалена.
Ridiculously Long Delay to Expire
When disconnects do happen, the common case should be a very* quick
reconnect to another server, but an extended network outage may
introduce a long delay before a client can reconnect to the ZooKeep‐
er service. Some developers wonder why the ZooKeeper client li‐
brary doesn’t simply decide at some point (perhaps twice the session
timeout) that enough is enough and kill the session itself.
There are two answers to this. First, ZooKeeper leaves this kind of
policy decision up to the developer. Developers can easily implement
such a policy by closing the handle themselves. Second, when a Zoo‐
Keeper ensemble goes down, time freezes. Thus, when the ensemble is
brought back up, session timeouts are restarted. If processes using
ZooKeeper hang in there, they may find out that the long timeout was
due to an extended ensemble failure that has recovered and pick right
up where they left off without any additional startup delay.
ZooKeeper: Distributed Process Coordination p118
*/
}
}
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册