提交 b8577266 编写于 作者: X Xi Chen

change pserver to use regular docker and some other tweaks

上级 f3a55f21
...@@ -478,6 +478,9 @@ def kickoff_pserver(host, pserver_endpoints_str): ...@@ -478,6 +478,9 @@ def kickoff_pserver(host, pserver_endpoints_str):
TASK_NAME=args.task_name, TASK_NAME=args.task_name,
COMMAND=args.pserver_command, COMMAND=args.pserver_command,
TRAINER_COUNT=args.trainer_count, TRAINER_COUNT=args.trainer_count,
TRAINER_INDEX=0,
# there is no way to use 0.0.0.0:port to start pserver
# has to docker --network="host" with host ip to make this work
SERVER_ENDPOINT=host + ":" + str(args.pserver_port), SERVER_ENDPOINT=host + ":" + str(args.pserver_port),
MASTER_ENDPOINT=args.master_server_ip + ":" + MASTER_ENDPOINT=args.master_server_ip + ":" +
str(args.master_server_port)) str(args.master_server_port))
...@@ -588,7 +591,7 @@ def start_server(args): ...@@ -588,7 +591,7 @@ def start_server(args):
logging.info("Received request to return status") logging.info("Received request to return status")
with open(args.log_path + "master.log", "r") as logfile: with open(args.log_path + "master.log", "r") as logfile:
self.wfile.write(logfile.read().strip()) self.wfile.write(logfile.read().strip())
elif request_path == "/list_logs": elif request_path == "/list_logs" or request_path == "/logs":
self._set_headers() self._set_headers()
self.wfile.write("\n".join(log_files)) self.wfile.write("\n".join(log_files))
elif "/log/" in request_path: elif "/log/" in request_path:
......
#!/bin/bash #!/bin/bash
nvidia-docker run -i -p {PSERVER_PORT}:{PSERVER_PORT} -e "SERVER_ENDPOINT={SERVER_ENDPOINT}" -e "MASTER_ENDPOINT={MASTER_ENDPOINT}" -e "TASK_NAME={TASK_NAME}" -e "TRAINING_ROLE=PSERVER" -e "TRAINERS={TRAINER_COUNT}" -e "PSERVER_HOSTS={PSERVER_HOSTS}" -e "PSERVERS={PSERVER_HOSTS}" {DOCKER_IMAGE} {COMMAND} docker run --network="host" -i -p {PSERVER_PORT}:{PSERVER_PORT} -e "SERVER_ENDPOINT={SERVER_ENDPOINT}" -e "MASTER_ENDPOINT={MASTER_ENDPOINT}" -e "TASK_NAME={TASK_NAME}" -e "TRAINER_INDEX={TRAINER_INDEX}" -e "TRAINING_ROLE=PSERVER" -e "TRAINER_COUNT={TRAINER_COUNT}" -e "TRAINERS={TRAINER_COUNT}" -e "PSERVER_HOSTS={PSERVER_HOSTS}" -e "PSERVERS={PSERVER_HOSTS}" {DOCKER_IMAGE} {COMMAND} --device CPU
\ No newline at end of file \ No newline at end of file
#!/bin/bash #!/bin/bash
nvidia-docker run -i -e "MASTER_ENDPOINT={MASTER_ENDPOINT}" -e "TASK_NAME={TASK_NAME}" -e "TRAINER_COUNT={TRAINER_COUNT}" -e "TRAINER_INDEX={TRAINER_INDEX}" -e "TRAINING_ROLE=TRAINER" -e "PSERVER_HOSTS={PSERVER_HOSTS}" {DOCKER_IMAGE} {COMMAND} nvidia-docker run -i -e "MASTER_ENDPOINT={MASTER_ENDPOINT}" -e "TASK_NAME={TASK_NAME}" -e "TRAINER_COUNT={TRAINER_COUNT}" -e "TRAINERS={TRAINER_COUNT}" -e "TRAINER_INDEX={TRAINER_INDEX}" -e "TRAINING_ROLE=TRAINER" -e "PSERVER_HOSTS={PSERVER_HOSTS}" {DOCKER_IMAGE} {COMMAND} --device GPU
\ No newline at end of file \ No newline at end of file
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册