Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into sub_sequence_op

7bb26805 · wanghaox · 1d95173c · 08bc08d6 · 7bb26805 · 7bb26805
4 changed file
--- a/benchmark/paddle/image/run_mkldnn.sh
+++ b/benchmark/paddle/image/run_mkldnn.sh
 set -e
 function train() {
-  unset OMP_NUM_THREADS MKL_NUM_THREADS
+  unset OMP_NUM_THREADS MKL_NUM_THREADS OMP_DYNAMIC KMP_AFFINITY
-  export OMP_DYNAMIC="FALSE"
-  export KMP_AFFINITY="granularity=fine,compact,0,0"
  topology=$1
  layer_num=$2
  bs=$3
@@ -14,8 +12,6 @@ function train() {
  elif [ $4 == "False" ]; then
    thread=`nproc`
    # each trainer_count use only 1 core to avoid conflict
-    export OMP_NUM_THREADS=1
-    export MKL_NUM_THREADS=1
    log="logs/${topology}-${layer_num}-${thread}mklml-${bs}.log"
  else
    echo "Wrong input $3, use True or False."

--- a/paddle/capi/Main.cpp
+++ b/paddle/capi/Main.cpp
@@ -29,6 +29,9 @@ static void initPaddle(int argc, char** argv) {
 extern "C" {
 paddle_error paddle_init(int argc, char** argv) {
+  static bool isInit = false;
+  if (isInit) return kPD_NO_ERROR;
  std::vector<char*> realArgv;
  realArgv.reserve(argc + 1);
  realArgv.push_back(strdup(""));
@@ -37,6 +40,7 @@ paddle_error paddle_init(int argc, char** argv) {
  }
  initPaddle(argc + 1, realArgv.data());
  free(realArgv[0]);
+  isInit = true;
  return kPD_NO_ERROR;
 }
 }
--- a/paddle/scripts/submit_local.sh.in
+++ b/paddle/scripts/submit_local.sh.in
@@ -43,6 +43,54 @@ function ver2num() {
  set +e
 }
+function cpu_config() {
+  # auto set KMP_AFFINITY and OMP_DYNAMIC from Hyper Threading Status
+  # only when MKLDNN or MKLML enabled
+  if [ "@WITH_MKLDNN@" == "OFF" ] && [ "@WITH_MKLML@" == "OFF"]; then
+    return 0
+  fi
+  ht=`lscpu |grep "per core"|awk -F':' '{print $2}'|xargs`
+  if [ $ht -eq 1 ]; then # HT is OFF
+    if [ -z "$KMP_AFFINITY" ]; then
+      export KMP_AFFINITY="granularity=fine,compact,0,0"
+    fi
+    if [ -z "$OMP_DYNAMIC" ]; then
+      export OMP_DYNAMIC="FALSE"
+    fi
+  else # HT is ON
+    if [ -z "$KMP_AFFINITY" ]; then
+      export KMP_AFFINITY="granularity=fine,compact,1,0"
+    fi
+    if [ -z "$OMP_DYNAMIC" ]; then
+      export OMP_DYNAMIC="True"
+    fi
+  fi
+}
+function threads_config() {
+  # auto set OMP_NUM_THREADS and MKL_NUM_THREADS
+  # according to trainer_count and total processors
+  # only when MKLDNN or MKLML enabled
+  if [ "@WITH_MKLDNN@" == "OFF" ] && [ "@WITH_MKLML@" == "OFF"]; then
+    return 0
+  fi
+  processors=`grep "processor" /proc/cpuinfo|sort -u|wc -l`
+  trainers=`grep -Eo 'trainer_count.[0-9]+' <<< "$@" |grep -Eo '[0-9]+'|xargs`
+  if [ -z $trainers ]; then
+    trainers=1
+  fi
+  threads=$((processors / trainers))
+  if [ $threads -eq 0 ]; then
+    threads=1
+  fi
+  if [ -z "$OMP_NUM_THREADS" ]; then
+    export OMP_NUM_THREADS=$threads
+  fi
+  if [ -z "$MKL_NUM_THREADS" ]; then
+    export MKL_NUM_THREADS=$threads
+  fi
+}
 PADDLE_CONF_HOME="$HOME/.config/paddle"
 mkdir -p ${PADDLE_CONF_HOME}
@@ -92,9 +140,13 @@ else:
  sys.exit(0)
 EOF
+cpu_config
+# echo $KMP_AFFINITY $OMP_DYNAMIC
 case "$1" in
    "train")
+        threads_config $@
+        # echo $OMP_NUM_THREADS $MKL_NUM_THREADS
        ${DEBUGGER} $PADDLE_BIN_PATH/paddle_trainer ${@:2}
        ;;
    "merge_model")

--- a/python/paddle/v2/__init__.py
+++ b/python/paddle/v2/__init__.py
@@ -76,6 +76,31 @@ def init(**kwargs):
    for key in args_dict.keys():
        args.append('--%s=%s' % (key, str(args_dict[key])))
+    # auto set cpu environment
+    def set_env(key, value):
+        '''If the key has not been set in the environment, set it with value.'''
+        assert isinstance(key, str)
+        assert isinstance(value, str)
+        envset = os.environ.get(key)
+        if envset is None:
+            os.environ[key] = value
+    ht = os.popen("lscpu |grep \"per core\"|awk -F':' '{print $2}'|xargs")
+    ht = int(ht.read())
+    if ht == 1:  # ht is off
+        set_env("OMP_DYNAMIC", "false")
+        set_env("KMP_AFFINITY", "granularity=fine,compact,0,0")
+    else:
+        set_env("OMP_DYNAMIC", "true")
+        set_env("KMP_AFFINITY", "granularity=fine,compact,1,0")
+    processors = os.popen("grep \"processor\" /proc/cpuinfo|sort -u|wc -l")
+    processors = int(processors.read())
+    trainers = kwargs.get('trainer_count', 1)
+    threads = processors / trainers
+    threads = '1' if threads < 1 else str(threads)
+    set_env("OMP_NUM_THREADS", threads)
+    set_env("MKL_NUM_THREADS", threads)
    if 'use_gpu' in kwargs:
        cp.g_command_config_args['use_gpu'] = kwargs['use_gpu']
    if 'use_mkldnn' in kwargs: