logger.info("Change selected_gpus into reletive values. --ips:{} "
"will change into relative_ips:{} according to your "
"CUDA_VISIBLE_DEVICES:{}".format(
gpus,res_gpus,cuda_visible_devices_list))
returnres_gpus
defdirect_start(args):
defdirect_start(args):
# run ps-cpu mode on paddlecloud, using given envs
# run ps-cpu mode on paddlecloud, using given envs
cmd=[sys.executable,"-u",args.training_script]+ \
cmd=[sys.executable,"-u",args.training_script]+ \
...
@@ -636,6 +665,7 @@ class ParameterServerLauncher(object):
...
@@ -636,6 +665,7 @@ class ParameterServerLauncher(object):
args.workers.split(",")
args.workers.split(",")
)==self.worker_num,"The worker_num and workers doesn't match. Expect workers endpoints num epual to worker_num, but received workers enpoint num: {} and worker_num {}".format(
)==self.worker_num,"The worker_num and workers doesn't match. Expect workers endpoints num epual to worker_num, but received workers enpoint num: {} and worker_num {}".format(
len(args.workers.split(",")),self.worker_num)
len(args.workers.split(",")),self.worker_num)
self.worker_endpoints=args.workers
self.worker_endpoints=args.workers
else:
else:
ports=get_ports(self.worker_num,self.server_num)
ports=get_ports(self.worker_num,self.server_num)
...
@@ -750,9 +780,9 @@ class ParameterServerLauncher(object):
...
@@ -750,9 +780,9 @@ class ParameterServerLauncher(object):