提交 b8cbd451 编写于 作者: M Megvii Engine Team 提交者: Xinran Xu

fix(mgb/mm_server): return -1 when create_mm_server failed

GitOrigin-RevId: 117bb80c06388d7b7c7ab71a787a1f03849a3871
上级 b87a9a74
......@@ -63,9 +63,11 @@ def init_process_group(
set_default_device(mgb.comp_node("gpu" + str(dev)))
if rank == 0:
res = mgb.config.create_mm_server("0.0.0.0", master_port)
if res != master_port:
_master_port = mgb.config.create_mm_server("0.0.0.0", master_port)
if _master_port == -1:
raise Exception("Failed to start server on port {}".format(master_port))
else:
assert master_port > 0, "master_port must be specified for non-zero rank"
def is_distributed() -> bool:
......
......@@ -214,11 +214,14 @@ public:
std::make_unique<ZmqRpc::ZmqRpcServer>("tcp://" + server_addr, port,
std::move(service));
port = server->port();
if (port == -1) {
return -1;
}
auto full_srv_addr = ssprintf("%s:%d", server_addr.c_str(), port);
server->run();
auto ins = m_addr2server.emplace(
full_srv_addr, ServerInfo{std::move(server)});
mgb_assert(ins.second);
return port;
}
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册