diff --git a/benchmark/paddle/image/run_mkldnn.sh b/benchmark/paddle/image/run_mkldnn.sh index a4527e04968cf8c8c3c31d16f50bc3e28381f6d8..3cc779b48d082985f75ab1c053fbe262bc6d58aa 100755 --- a/benchmark/paddle/image/run_mkldnn.sh +++ b/benchmark/paddle/image/run_mkldnn.sh @@ -1,9 +1,7 @@ set -e function train() { - unset OMP_NUM_THREADS MKL_NUM_THREADS - export OMP_DYNAMIC="FALSE" - export KMP_AFFINITY="granularity=fine,compact,0,0" + unset OMP_NUM_THREADS MKL_NUM_THREADS OMP_DYNAMIC KMP_AFFINITY topology=$1 layer_num=$2 bs=$3 @@ -14,8 +12,6 @@ function train() { elif [ $4 == "False" ]; then thread=`nproc` # each trainer_count use only 1 core to avoid conflict - export OMP_NUM_THREADS=1 - export MKL_NUM_THREADS=1 log="logs/${topology}-${layer_num}-${thread}mklml-${bs}.log" else echo "Wrong input $3, use True or False." diff --git a/paddle/capi/Main.cpp b/paddle/capi/Main.cpp index 78c43949dfe325d0e1a6ba10ae51cb7b858f6c52..bb8249a5511c089ec2f2263ff4cc290f0a5a8fce 100644 --- a/paddle/capi/Main.cpp +++ b/paddle/capi/Main.cpp @@ -29,6 +29,9 @@ static void initPaddle(int argc, char** argv) { extern "C" { paddle_error paddle_init(int argc, char** argv) { + static bool isInit = false; + if (isInit) return kPD_NO_ERROR; + std::vector realArgv; realArgv.reserve(argc + 1); realArgv.push_back(strdup("")); @@ -37,6 +40,7 @@ paddle_error paddle_init(int argc, char** argv) { } initPaddle(argc + 1, realArgv.data()); free(realArgv[0]); + isInit = true; return kPD_NO_ERROR; } } diff --git a/paddle/scripts/submit_local.sh.in b/paddle/scripts/submit_local.sh.in index 5c4b5a2495182ea5d2b3341cff650dfb4d8b0c0f..b9a49526a7e02131767a4e9b26cd0b53278176d0 100755 --- a/paddle/scripts/submit_local.sh.in +++ b/paddle/scripts/submit_local.sh.in @@ -43,6 +43,54 @@ function ver2num() { set +e } +function cpu_config() { + # auto set KMP_AFFINITY and OMP_DYNAMIC from Hyper Threading Status + # only when MKLDNN or MKLML enabled + if [ "@WITH_MKLDNN@" == "OFF" ] && [ "@WITH_MKLML@" == "OFF"]; then + return 0 + fi + ht=`lscpu |grep "per core"|awk -F':' '{print $2}'|xargs` + if [ $ht -eq 1 ]; then # HT is OFF + if [ -z "$KMP_AFFINITY" ]; then + export KMP_AFFINITY="granularity=fine,compact,0,0" + fi + if [ -z "$OMP_DYNAMIC" ]; then + export OMP_DYNAMIC="FALSE" + fi + else # HT is ON + if [ -z "$KMP_AFFINITY" ]; then + export KMP_AFFINITY="granularity=fine,compact,1,0" + fi + if [ -z "$OMP_DYNAMIC" ]; then + export OMP_DYNAMIC="True" + fi + fi +} + +function threads_config() { + # auto set OMP_NUM_THREADS and MKL_NUM_THREADS + # according to trainer_count and total processors + # only when MKLDNN or MKLML enabled + if [ "@WITH_MKLDNN@" == "OFF" ] && [ "@WITH_MKLML@" == "OFF"]; then + return 0 + fi + processors=`grep "processor" /proc/cpuinfo|sort -u|wc -l` + trainers=`grep -Eo 'trainer_count.[0-9]+' <<< "$@" |grep -Eo '[0-9]+'|xargs` + if [ -z $trainers ]; then + trainers=1 + fi + threads=$((processors / trainers)) + if [ $threads -eq 0 ]; then + threads=1 + fi + if [ -z "$OMP_NUM_THREADS" ]; then + export OMP_NUM_THREADS=$threads + fi + if [ -z "$MKL_NUM_THREADS" ]; then + export MKL_NUM_THREADS=$threads + fi +} + PADDLE_CONF_HOME="$HOME/.config/paddle" mkdir -p ${PADDLE_CONF_HOME} @@ -92,9 +140,13 @@ else: sys.exit(0) EOF +cpu_config +# echo $KMP_AFFINITY $OMP_DYNAMIC case "$1" in "train") + threads_config $@ + # echo $OMP_NUM_THREADS $MKL_NUM_THREADS ${DEBUGGER} $PADDLE_BIN_PATH/paddle_trainer ${@:2} ;; "merge_model") diff --git a/python/paddle/v2/__init__.py b/python/paddle/v2/__init__.py index 43df089363782b89524138b15fb4d8ea37abbca9..7bbe3eaaa67a117bc53571e6571365c3a26814c1 100644 --- a/python/paddle/v2/__init__.py +++ b/python/paddle/v2/__init__.py @@ -76,6 +76,31 @@ def init(**kwargs): for key in args_dict.keys(): args.append('--%s=%s' % (key, str(args_dict[key]))) + # auto set cpu environment + def set_env(key, value): + '''If the key has not been set in the environment, set it with value.''' + assert isinstance(key, str) + assert isinstance(value, str) + envset = os.environ.get(key) + if envset is None: + os.environ[key] = value + + ht = os.popen("lscpu |grep \"per core\"|awk -F':' '{print $2}'|xargs") + ht = int(ht.read()) + if ht == 1: # ht is off + set_env("OMP_DYNAMIC", "false") + set_env("KMP_AFFINITY", "granularity=fine,compact,0,0") + else: + set_env("OMP_DYNAMIC", "true") + set_env("KMP_AFFINITY", "granularity=fine,compact,1,0") + processors = os.popen("grep \"processor\" /proc/cpuinfo|sort -u|wc -l") + processors = int(processors.read()) + trainers = kwargs.get('trainer_count', 1) + threads = processors / trainers + threads = '1' if threads < 1 else str(threads) + set_env("OMP_NUM_THREADS", threads) + set_env("MKL_NUM_THREADS", threads) + if 'use_gpu' in kwargs: cp.g_command_config_args['use_gpu'] = kwargs['use_gpu'] if 'use_mkldnn' in kwargs: