未验证 提交 0bddb951 编写于 作者: T tangwei12 提交者: GitHub

fix async mode, test=develop (#21367)

上级 94bef035
...@@ -464,23 +464,21 @@ void ListenAndServOp::RunImpl(const framework::Scope &scope, ...@@ -464,23 +464,21 @@ void ListenAndServOp::RunImpl(const framework::Scope &scope,
f(request_get_no_barrier_handler_.get()); f(request_get_no_barrier_handler_.get());
f(request_notify_handler_.get()); f(request_notify_handler_.get());
// register SIGINT(from ctrl+C) and SIGTERM(from kill) signal handlers
signal(SIGINT, SignalHandler::StopAndExit);
signal(SIGTERM, SignalHandler::StopAndExit);
if (sync_mode) {
// start the server listening after all member initialized. // start the server listening after all member initialized.
server_thread_.reset(new std::thread(RunServer, rpc_service_)); server_thread_.reset(new std::thread(RunServer, rpc_service_));
VLOG(3) << "wait server thread to become ready..."; VLOG(3) << "wait server thread to become ready...";
rpc_service_->WaitServerReady(); rpc_service_->WaitServerReady();
// register SIGINT(from ctrl+C) and SIGTERM(from kill) signal handlers
signal(SIGINT, SignalHandler::StopAndExit);
signal(SIGTERM, SignalHandler::StopAndExit);
// Cache the type of the received vars as `sparse_vars_` and `dense_vars_`
// so that we can reset them at the end of each iteration.
// NOTE: only used in sync update
CacheVarsType(inputs, recv_scope); CacheVarsType(inputs, recv_scope);
// Write to a file of server selected port for python use. // Write to a file of server selected port for python use.
SavePort(); SavePort();
if (sync_mode) {
RunSyncLoop(&executor, program, &recv_scope, &dev_ctx, RunSyncLoop(&executor, program, &recv_scope, &dev_ctx,
prefetch_block_id_list, checkpoint_block_id); prefetch_block_id_list, checkpoint_block_id);
} else { } else {
...@@ -498,6 +496,15 @@ void ListenAndServOp::RunImpl(const framework::Scope &scope, ...@@ -498,6 +496,15 @@ void ListenAndServOp::RunImpl(const framework::Scope &scope,
split(grad_to_block_id_str[0], ':', &pieces); split(grad_to_block_id_str[0], ':', &pieces);
distributed::HeartBeatMonitor::Init(fan_in, pserver_id == 0, pieces[0]); distributed::HeartBeatMonitor::Init(fan_in, pserver_id == 0, pieces[0]);
} }
// start the server listening after all member initialized.
server_thread_.reset(new std::thread(RunServer, rpc_service_));
VLOG(3) << "wait server thread to become ready...";
rpc_service_->WaitServerReady();
// Write to a file of server selected port for python use.
SavePort();
RunAsyncLoop(&executor, program, &recv_scope); RunAsyncLoop(&executor, program, &recv_scope);
} }
} }
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册