From c0f94aef0d27cf5323d8cd74c8db32d7ddd36989 Mon Sep 17 00:00:00 2001 From: tangwei12 Date: Wed, 16 Oct 2019 16:49:08 +0800 Subject: [PATCH] fix bug with heart beat , test=develop (#20658) --- paddle/fluid/operators/distributed/heart_beat_monitor.h | 9 +-------- .../fluid/operators/distributed/request_handler_impl.cc | 6 +++++- 2 files changed, 6 insertions(+), 9 deletions(-) diff --git a/paddle/fluid/operators/distributed/heart_beat_monitor.h b/paddle/fluid/operators/distributed/heart_beat_monitor.h index 639785ba513..211e40757fc 100644 --- a/paddle/fluid/operators/distributed/heart_beat_monitor.h +++ b/paddle/fluid/operators/distributed/heart_beat_monitor.h @@ -84,14 +84,7 @@ class HeartBeatMonitor { be_monitored_var); } - static HeartBeatMonitor* GetInstance() { - if (monitor_ == nullptr) { - PADDLE_THROW( - "HeartBeatMonitor is not inited, call " - "HeartBeatMonitor::Init first"); - } - return monitor_.get(); - } + static HeartBeatMonitor* GetInstance() { return monitor_.get(); } void Stop() { running_ = false; diff --git a/paddle/fluid/operators/distributed/request_handler_impl.cc b/paddle/fluid/operators/distributed/request_handler_impl.cc index 96098a4e226..9a7da5f8f92 100644 --- a/paddle/fluid/operators/distributed/request_handler_impl.cc +++ b/paddle/fluid/operators/distributed/request_handler_impl.cc @@ -53,7 +53,11 @@ bool RequestSendHandler::Handle(const std::string& varname, rpc_server_->IncreaseBatchBarrier(kRequestSend); } else if (varname == COMPLETE_MESSAGE) { VLOG(3) << "sync: recv complete message"; - HeartBeatMonitor::GetInstance()->Update(trainer_id, "", COMPLETED); + + if (HeartBeatMonitor::GetInstance() != nullptr) { + HeartBeatMonitor::GetInstance()->Update(trainer_id, "", COMPLETED); + } + rpc_server_->Complete(); } else { // Async -- GitLab