提交 35ce6ac2 编写于 作者: H heqiaozhi

add ps_instance doc

上级 caa6b596
...@@ -14,27 +14,36 @@ ...@@ -14,27 +14,36 @@
import helper as dist_helper import helper as dist_helper
import sys import sys
class PaddlePSInstance(object): class PaddlePSInstance(object):
"""
PaddlePSInstance class is used to generate A instance of server or worker
Args:
server_worker_mode: is a value 0 or 1, default is 1
proc_per_node: process per node, default is 2
Examples:
instance = PaddlePSInstance(1, 2)
"""
def __init__(self, server_worker_mode, proc_per_node): def __init__(self, server_worker_mode, proc_per_node):
self.dh = dist_helper.MPIHelper() self.dh = dist_helper.MPIHelper()
self._rankid = self.dh.get_rank() self._rankid = self.dh.get_rank()
self._server_worker_mode = server_worker_mode self._server_worker_mode = server_worker_mode
self._proc_per_node = proc_per_node self._proc_per_node = proc_per_node
self._nodes = self.dh.get_size() self._nodes = self.dh.get_size()
self._ip = 0 self._ip = 0
self._worker_num = self._nodes * self._proc_per_node / 2 self._worker_num = self._nodes * self._proc_per_node / 2
self._server_num = self._nodes * self._proc_per_node / 2 self._server_num = self._nodes * self._proc_per_node / 2
self._total_server_worker = self._worker_num + self._server_num self._total_server_worker = self._worker_num + self._server_num
self._node_type = None #IDLE=-1, WORKER=1, SERVER=0 self._node_type = None #IDLE=-1, WORKER=1, SERVER=0
self._set_nodetype() self._set_nodetype()
self._comm = None self._comm = None
self._split_comm() self._split_comm()
def _set_nodetype(self): def _set_nodetype(self):
if self._server_worker_mode == 0: if self._server_worker_mode == 0:
if self._rankid < self._server_num: if self._rankid < self._server_num:
self._node_type = 1 self._node_type = 1
elif self._rankid < self._total_server_worker: elif self._rankid < self._total_server_worker:
self._node_type = 0 self._node_type = 0
...@@ -46,13 +55,13 @@ class PaddlePSInstance(object): ...@@ -46,13 +55,13 @@ class PaddlePSInstance(object):
self._node_type = 0 self._node_type = 0
else: else:
self._node_type = 1 self._node_type = 1
else: else:
self._node_type = -1; self._node_type = -1
else: else:
self._node_type = -1 self._node_type = -1
#if self._rankid == 0: #if self._rankid == 0:
#print "node type: ", self._node_type #print "node type: ", self._node_type
def _split_comm(self): def _split_comm(self):
if self.is_server(): if self.is_server():
...@@ -62,45 +71,78 @@ class PaddlePSInstance(object): ...@@ -62,45 +71,78 @@ class PaddlePSInstance(object):
pass pass
def get_worker_index(self): def get_worker_index(self):
"""
Return worker index
"""
if self._server_worker_mode == 0: if self._server_worker_mode == 0:
return self._rankid == self.server_num return self._rankid == self.server_num
else: else:
return self._rankid / self._proc_per_node return self._rankid / self._proc_per_node
def get_server_index(self): def get_server_index(self):
"""
Return server index
"""
if self._server_worker_mode == 0: if self._server_worker_mode == 0:
return self.rank_id return self.rank_id
else: else:
return self.rank_id / self._proc_per_node return self.rank_id / self._proc_per_node
def is_worker(self): def is_worker(self):
"""
Return instance is worker or not
"""
return self._node_type == 1 return self._node_type == 1
def is_server(self): def is_server(self):
"""
Return instance is server or not
"""
return self._node_type == 0 return self._node_type == 0
def is_first_worker(self): def is_first_worker(self):
"""
Return instance is first worker or not
"""
return self.is_worker() and 0 == self.get_worker_index() return self.is_worker() and 0 == self.get_worker_index()
def set_ip(self, ip): def set_ip(self, ip):
"""
set server ip
"""
self._ip = ip self._ip = ip
def gather_ips(self): def gather_ips(self):
"""
Return all servers and workers ip throught mpi allgather
"""
self._ips = self.dh.comm.allgather(self._ip) self._ips = self.dh.comm.allgather(self._ip)
return self._ips return self._ips
def get_node_cnt(self): def get_node_cnt(self):
"""
Return node cnt
"""
return self._nodes return self._nodes
def barrier_all(self): def barrier_all(self):
"""
barrier workers and servers
"""
self.dh.comm.barrier() self.dh.comm.barrier()
def barrier_worker(self): def barrier_worker(self):
"""
barrier workers
"""
if self.is_worker(): if self.is_worker():
self._comm.barrier() self._comm.barrier()
pass pass
def finalize(self): def finalize(self):
"""
MPI finalize
"""
self.dh.finalize() self.dh.finalize()
pass pass
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册