From 53a509daaff499edd4d9e9633e51dfb4fae24def Mon Sep 17 00:00:00 2001 From: Xin Pan Date: Thu, 7 Jun 2018 10:08:38 +0800 Subject: [PATCH] make benchmark really working (#11215) --- benchmark/fluid/README.md | 2 ++ benchmark/fluid/run_fluid_benchmark.sh | 9 +++++++++ 2 files changed, 11 insertions(+) create mode 100644 benchmark/fluid/run_fluid_benchmark.sh diff --git a/benchmark/fluid/README.md b/benchmark/fluid/README.md index 1b0c7dce8..d3e157794 100644 --- a/benchmark/fluid/README.md +++ b/benchmark/fluid/README.md @@ -29,9 +29,11 @@ Currently supported `--model` argument include: You can choose to use GPU/CPU training. With GPU training, you can specify `--gpus ` to run multi GPU training. * Run distributed training with parameter servers: + * see run_fluid_benchmark.sh as an example. * start parameter servers: ```bash PADDLE_TRAINING_ROLE=PSERVER PADDLE_PSERVER_PORT=7164 PADDLE_PSERVER_IPS=127.0.0.1 PADDLE_TRAINERS=1 PADDLE_CURRENT_IP=127.0.0.1 PADDLE_TRAINER_ID=0 python fluid_benchmark.py --model mnist --device GPU --update_method pserver + sleep 15 ``` * start trainers: ```bash diff --git a/benchmark/fluid/run_fluid_benchmark.sh b/benchmark/fluid/run_fluid_benchmark.sh new file mode 100644 index 000000000..4309a3126 --- /dev/null +++ b/benchmark/fluid/run_fluid_benchmark.sh @@ -0,0 +1,9 @@ +#!/bin/bash + +PADDLE_TRAINING_ROLE=PSERVER PADDLE_PSERVER_PORT=7164 PADDLE_PSERVER_IPS=127.0.0.1 PADDLE_TRAINERS=2 PADDLE_CURRENT_IP=127.0.0.1 PADDLE_TRAINER_ID=0 python fluid_benchmark.py --model resnet --device CPU --update_method pserver --iterations=10000 & + +sleep 15 + +CUDA_VISIBLE_DEVICES=0,1 PADDLE_TRAINING_ROLE=TRAINER PADDLE_PSERVER_PORT=7164 PADDLE_PSERVER_IPS=127.0.0.1 PADDLE_TRAINERS=2 PADDLE_CURRENT_IP=127.0.0.1 PADDLE_TRAINER_ID=0 python fluid_benchmark.py --model resnet --device GPU --update_method pserver --iterations=10000 --gpus 2 & + +CUDA_VISIBLE_DEVICES=2,3 PADDLE_TRAINING_ROLE=TRAINER PADDLE_PSERVER_PORT=7164 PADDLE_PSERVER_IPS=127.0.0.1 PADDLE_TRAINERS=2 PADDLE_CURRENT_IP=127.0.0.1 PADDLE_TRAINER_ID=1 python fluid_benchmark.py --model resnet --device GPU --update_method pserver --iterations=10000 --gpus 2 & -- GitLab