From fd616fadc24e5d9ffe65dfaf261b7b264a13ef9c Mon Sep 17 00:00:00 2001 From: tangwei12 Date: Wed, 30 Sep 2020 13:40:45 +0800 Subject: [PATCH] repen heartbeat ut (#27684) --- .../distributed/heart_beat_monitor.cc | 2 +- .../distributed/heart_beat_monitor_test.cc | 50 +++++++++---------- 2 files changed, 25 insertions(+), 27 deletions(-) diff --git a/paddle/fluid/operators/distributed/heart_beat_monitor.cc b/paddle/fluid/operators/distributed/heart_beat_monitor.cc index fda5fd09a4e..9f537f53348 100644 --- a/paddle/fluid/operators/distributed/heart_beat_monitor.cc +++ b/paddle/fluid/operators/distributed/heart_beat_monitor.cc @@ -84,7 +84,7 @@ void HeartBeatMonitor::LostWorkerMonitor() { } } - std::this_thread::sleep_for(std::chrono::milliseconds(30 * 1000)); + std::this_thread::sleep_for(std::chrono::milliseconds(10 * 1000)); } VLOG(1) << "worker heartbeat monitor stopped, thread exit"; } diff --git a/paddle/fluid/operators/distributed/heart_beat_monitor_test.cc b/paddle/fluid/operators/distributed/heart_beat_monitor_test.cc index 7c2c0fbff11..8505023f63a 100644 --- a/paddle/fluid/operators/distributed/heart_beat_monitor_test.cc +++ b/paddle/fluid/operators/distributed/heart_beat_monitor_test.cc @@ -23,32 +23,30 @@ namespace distributed { void run(HeartBeatMonitor* monitor) { monitor->LostWorkerMonitor(); } TEST(HeartBeatMonitor, All) { - // (tangwei12) fix it soon. - return; - // int trainers = 10; - // int pserver_id = 0; - // std::string var = "nce_w@GRAD.block0"; - // std::string var2 = "nce_w@GRAD.block2"; - // - // HeartBeatMonitor::Init(trainers, pserver_id == 0, var); - // - // auto* monitor = HeartBeatMonitor::GetInstance(); - // - // std::vector ids{1, 3, 5, 7}; - // - // for (auto& id : ids) { - // monitor->Update(id, var, RUNNING); - // } - // - // monitor->Update(9, var2, RUNNING); - // monitor->Update(2, var, COMPLETED); - // - // std::thread t(run, monitor); - // t.detach(); - // - // std::this_thread::sleep_for(std::chrono::milliseconds(45 * 1000)); - // - // monitor->Stop(); + int trainers = 10; + int pserver_id = 0; + std::string var = "nce_w@GRAD.block0"; + std::string var2 = "nce_w@GRAD.block2"; + + HeartBeatMonitor::Init(trainers, pserver_id == 0, var); + + auto* monitor = HeartBeatMonitor::GetInstance(); + + std::vector ids{1, 3, 5, 7}; + + for (auto& id : ids) { + monitor->Update(id, var, RUNNING); + } + + monitor->Update(9, var2, RUNNING); + monitor->Update(2, var, COMPLETED); + + std::thread t(run, monitor); + t.detach(); + + std::this_thread::sleep_for(std::chrono::milliseconds(15 * 1000)); + + monitor->Stop(); } } // namespace distributed -- GitLab