run.sh 3.4 KB
Newer Older
D
dzhwinter 已提交
1 2 3
#!/bin/bash
# This script benchmarking the PaddlePaddle Fluid on
# single thread single GPU.
D
dzhwinter 已提交
4

L
Luo Tao 已提交
5
mkdir -p logs
D
dzhwinter 已提交
6 7
#export FLAGS_fraction_of_gpu_memory_to_use=0.0
export CUDNN_PATH=/paddle/cudnn_v5
D
dzhwinter 已提交
8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30

# disable openmp and mkl parallel
#https://github.com/PaddlePaddle/Paddle/issues/7199
export MKL_NUM_THREADS=1
export OMP_NUM_THREADS=1
ht=`lscpu |grep "per core"|awk -F':' '{print $2}'|xargs`
if [ $ht -eq 1 ]; then # HT is OFF
    if [ -z "$KMP_AFFINITY" ]; then
        export KMP_AFFINITY="granularity=fine,compact,0,0"
    fi
    if [ -z "$OMP_DYNAMIC" ]; then
        export OMP_DYNAMIC="FALSE"
    fi
else # HT is ON
    if [ -z "$KMP_AFFINITY" ]; then
        export KMP_AFFINITY="granularity=fine,compact,1,0"
    fi
fi
# disable multi-gpu if have more than one
export CUDA_VISIBLE_DEVICES=0
export LD_LIBRARY_PATH=/usr/local/lib:$LD_LIBRARY_PATH
export LD_LIBRARY_PATH=$CUDNN_PATH:$LD_LIBRARY_PATH

D
dzhwinter 已提交
31 32 33 34 35 36 37 38
# only query the gpu used
nohup stdbuf -oL nvidia-smi \
      --id=${CUDA_VISIBLE_DEVICES} \
      --query-gpu=timestamp \
      --query-compute-apps=pid,process_name,used_memory \
      --format=csv \
      --filename=mem.log  \
      -l 1 &
L
Luo Tao 已提交
39

D
dzhwinter 已提交
40 41
# mnist
# mnist gpu mnist 128
W
Wu Yi 已提交
42 43
FLAGS_benchmark=true stdbuf -oL python fluid_benchmark.py \
               --model=mnist \
D
dzhwinter 已提交
44 45 46 47
               --device=GPU \
               --batch_size=128 \
               --skip_batch_num=5 \
               --iterations=500 \
L
Luo Tao 已提交
48
               2>&1 | tee -a logs/mnist_gpu_128.log
D
dzhwinter 已提交
49 50

# vgg16
D
dzhwinter 已提交
51
# gpu cifar10 128
W
Wu Yi 已提交
52 53
FLAGS_benchmark=true stdbuf -oL python fluid_benchmark.py \
               --model=vgg16 \
D
dzhwinter 已提交
54 55 56
               --device=GPU \
               --batch_size=128 \
               --skip_batch_num=5 \
D
dzhwinter 已提交
57
               --iterations=30 \
L
Luo Tao 已提交
58
               2>&1 | tee -a logs/vgg16_gpu_128.log
D
dzhwinter 已提交
59 60

# flowers gpu  128
W
Wu Yi 已提交
61 62
FLAGS_benchmark=true stdbuf -oL python fluid_benchmark.py \
               --model=vgg16 \
D
dzhwinter 已提交
63 64 65 66 67
               --device=GPU \
               --batch_size=32 \
               --data_set=flowers \
               --skip_batch_num=5 \
               --iterations=30 \
L
Luo Tao 已提交
68
               2>&1 | tee -a logs/vgg16_gpu_flowers_32.log
D
dzhwinter 已提交
69 70 71

# resnet50
# resnet50 gpu cifar10 128
W
Wu Yi 已提交
72
FLAGS_benchmark=true stdbuf -oL python fluid_benchmark.py \
L
Luo Tao 已提交
73
               --model=resnet \
D
dzhwinter 已提交
74 75 76 77 78
               --device=GPU \
               --batch_size=128 \
               --data_set=cifar10 \
               --skip_batch_num=5 \
               --iterations=30 \
L
Luo Tao 已提交
79
               2>&1 | tee -a logs/resnet50_gpu_128.log
D
dzhwinter 已提交
80 81

# resnet50 gpu flowers 64
W
Wu Yi 已提交
82
FLAGS_benchmark=true stdbuf -oL python fluid_benchmark.py \
L
Luo Tao 已提交
83
               --model=resnet \
D
dzhwinter 已提交
84 85 86 87 88
               --device=GPU \
               --batch_size=64 \
               --data_set=flowers \
               --skip_batch_num=5 \
               --iterations=30 \
L
Luo Tao 已提交
89
               2>&1 | tee -a logs/resnet50_gpu_flowers_64.log
D
dzhwinter 已提交
90 91

# lstm
D
dzhwinter 已提交
92
# lstm gpu imdb 32 # tensorflow only support batch=32
W
Wu Yi 已提交
93 94
FLAGS_benchmark=true stdbuf -oL python fluid_benchmark.py \
               --model=stacked_dynamic_lstm \
D
dzhwinter 已提交
95 96 97 98
               --device=GPU \
               --batch_size=32 \
               --skip_batch_num=5 \
               --iterations=30 \
L
Luo Tao 已提交
99
               2>&1 | tee -a logs/lstm_gpu_32.log
D
dzhwinter 已提交
100 101 102

# seq2seq
# seq2seq gpu wmb 128
W
Wu Yi 已提交
103 104
FLAGS_benchmark=true stdbuf -oL python fluid_benchmark.py \
               --model=machine_translation \
D
dzhwinter 已提交
105 106 107 108
               --device=GPU \
               --batch_size=128 \
               --skip_batch_num=5 \
               --iterations=30 \
L
Luo Tao 已提交
109
               2>&1 | tee -a logs/lstm_gpu_128.log