cluster_train.sh 1.2 KB
Newer Older
H
hutuxian 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56
#!/bin/bash

#export GLOG_v=30
#export GLOG_logtostderr=1

python -u cluster_train.py \
--config_path 'data/config.txt' \
--train_dir 'data/paddle_train.txt' \
--batch_size 32 \
--epoch_num 100 \
--use_cuda 0 \
--parallel 0 \
--role pserver \
--endpoints 127.0.0.1:6000,127.0.0.1:6001 \
--current_endpoint 127.0.0.1:6000 \
--trainers 2 \
> pserver0.log 2>&1 &

python -u cluster_train.py \
--config_path 'data/config.txt' \
--train_dir 'data/paddle_train.txt' \
--batch_size 32 \
--epoch_num 100 \
--use_cuda 0 \
--parallel 0 \
--role pserver \
--endpoints 127.0.0.1:6000,127.0.0.1:6001 \
--current_endpoint 127.0.0.1:6001 \
--trainers 2 \
> pserver1.log 2>&1 &

python -u cluster_train.py \
--config_path 'data/config.txt' \
--train_dir 'data/paddle_train.txt' \
--batch_size 32 \
--epoch_num 100 \
--use_cuda 0 \
--parallel 0 \
--role trainer \
--endpoints 127.0.0.1:6000,127.0.0.1:6001 \
--trainers 2 \
--trainer_id 0 \
> trainer0.log 2>&1 &

python -u cluster_train.py \
--config_path 'data/config.txt' \
--train_dir 'data/paddle_train.txt' \
--batch_size 32 \
--epoch_num 100 \
--use_cuda 0 \
--parallel 0 \
--role trainer \
--endpoints 127.0.0.1:6000,127.0.0.1:6001 \
--trainers 2 \
--trainer_id 1 \
> trainer1.log 2>&1 &