run_mkldnn.sh 1.6 KB
Newer Older
1 2 3
set -e

function train() {
T
tensor-tang 已提交
4
  unset OMP_NUM_THREADS MKL_NUM_THREADS
5
  topology=$1
T
tensor-tang 已提交
6 7 8 9
  layer_num=$2
  bs=$3
  use_mkldnn=$4
  if [ $4 == "True" ]; then
T
tensor-tang 已提交
10
    thread=1
T
tensor-tang 已提交
11 12
    log="logs/${topology}-${layer_num}-mkldnn-${bs}.log"
  elif [ $4 == "False" ]; then
T
tensor-tang 已提交
13
    thread=`nproc`
T
tensor-tang 已提交
14 15 16
    # each trainer_count use only 1 core to avoid conflict
    export OMP_NUM_THREADS=1
    export MKL_NUM_THREADS=1
T
tensor-tang 已提交
17
    log="logs/${topology}-${layer_num}-${thread}mklml-${bs}.log"
T
tensor-tang 已提交
18 19
  else
    echo "Wrong input $3, use True or False."
T
tensor-tang 已提交
20
    exit 0
21
  fi
T
tensor-tang 已提交
22
  args="batch_size=${bs},layer_num=${layer_num}"
23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41
  config="${topology}.py"
  paddle train --job=time \
    --config=$config \
    --use_mkldnn=$use_mkldnn \
    --use_gpu=False \
    --trainer_count=$thread \
    --log_period=10 \
    --test_period=100 \
    --config_args=$args \
    2>&1 | tee ${log} 
}

if [ ! -d "train.list" ]; then
  echo " " > train.list
fi
if [ ! -d "logs" ]; then
  mkdir logs
fi

T
tensor-tang 已提交
42 43 44 45 46 47 48 49 50 51 52
total_cores=`ls -l /sys/devices/system/cpu/ | grep "cpu[0-9]*$" | wc -l`
online_cores=`cat /sys/devices/system/cpu/cpu*/online | grep -o '1' | wc -l`
if [ $online_cores -eq $total_cores ]; then
  echo "Hyper Threading is ON"
  export KMP_AFFINITY="granularity=fine,compact,1,0"
else
  echo "Hyper Threading is OFF"
  export OMP_DYNAMIC="FALSE"
  export KMP_AFFINITY="granularity=fine,compact,0,0"
fi

T
tensor-tang 已提交
53 54 55 56 57
for use_mkldnn in True False; do
  for batchsize in 64 128 256; do
    # vgg-19 and vgg-16
    train vgg 19 $batchsize $use_mkldnn
    train vgg 16 $batchsize $use_mkldnn
58

T
tensor-tang 已提交
59 60 61 62 63 64
    # resnet-50, 101 and 152
    train resnet 50  $batchsize $use_mkldnn
    train resnet 101 $batchsize $use_mkldnn
    train resnet 152 $batchsize $use_mkldnn
  done
done