提交 7bb26805 编写于 作者: W wanghaox

Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into sub_sequence_op

set -e set -e
function train() { function train() {
unset OMP_NUM_THREADS MKL_NUM_THREADS unset OMP_NUM_THREADS MKL_NUM_THREADS OMP_DYNAMIC KMP_AFFINITY
export OMP_DYNAMIC="FALSE"
export KMP_AFFINITY="granularity=fine,compact,0,0"
topology=$1 topology=$1
layer_num=$2 layer_num=$2
bs=$3 bs=$3
...@@ -14,8 +12,6 @@ function train() { ...@@ -14,8 +12,6 @@ function train() {
elif [ $4 == "False" ]; then elif [ $4 == "False" ]; then
thread=`nproc` thread=`nproc`
# each trainer_count use only 1 core to avoid conflict # each trainer_count use only 1 core to avoid conflict
export OMP_NUM_THREADS=1
export MKL_NUM_THREADS=1
log="logs/${topology}-${layer_num}-${thread}mklml-${bs}.log" log="logs/${topology}-${layer_num}-${thread}mklml-${bs}.log"
else else
echo "Wrong input $3, use True or False." echo "Wrong input $3, use True or False."
......
...@@ -29,6 +29,9 @@ static void initPaddle(int argc, char** argv) { ...@@ -29,6 +29,9 @@ static void initPaddle(int argc, char** argv) {
extern "C" { extern "C" {
paddle_error paddle_init(int argc, char** argv) { paddle_error paddle_init(int argc, char** argv) {
static bool isInit = false;
if (isInit) return kPD_NO_ERROR;
std::vector<char*> realArgv; std::vector<char*> realArgv;
realArgv.reserve(argc + 1); realArgv.reserve(argc + 1);
realArgv.push_back(strdup("")); realArgv.push_back(strdup(""));
...@@ -37,6 +40,7 @@ paddle_error paddle_init(int argc, char** argv) { ...@@ -37,6 +40,7 @@ paddle_error paddle_init(int argc, char** argv) {
} }
initPaddle(argc + 1, realArgv.data()); initPaddle(argc + 1, realArgv.data());
free(realArgv[0]); free(realArgv[0]);
isInit = true;
return kPD_NO_ERROR; return kPD_NO_ERROR;
} }
} }
...@@ -43,6 +43,54 @@ function ver2num() { ...@@ -43,6 +43,54 @@ function ver2num() {
set +e set +e
} }
function cpu_config() {
# auto set KMP_AFFINITY and OMP_DYNAMIC from Hyper Threading Status
# only when MKLDNN or MKLML enabled
if [ "@WITH_MKLDNN@" == "OFF" ] && [ "@WITH_MKLML@" == "OFF"]; then
return 0
fi
ht=`lscpu |grep "per core"|awk -F':' '{print $2}'|xargs`
if [ $ht -eq 1 ]; then # HT is OFF
if [ -z "$KMP_AFFINITY" ]; then
export KMP_AFFINITY="granularity=fine,compact,0,0"
fi
if [ -z "$OMP_DYNAMIC" ]; then
export OMP_DYNAMIC="FALSE"
fi
else # HT is ON
if [ -z "$KMP_AFFINITY" ]; then
export KMP_AFFINITY="granularity=fine,compact,1,0"
fi
if [ -z "$OMP_DYNAMIC" ]; then
export OMP_DYNAMIC="True"
fi
fi
}
function threads_config() {
# auto set OMP_NUM_THREADS and MKL_NUM_THREADS
# according to trainer_count and total processors
# only when MKLDNN or MKLML enabled
if [ "@WITH_MKLDNN@" == "OFF" ] && [ "@WITH_MKLML@" == "OFF"]; then
return 0
fi
processors=`grep "processor" /proc/cpuinfo|sort -u|wc -l`
trainers=`grep -Eo 'trainer_count.[0-9]+' <<< "$@" |grep -Eo '[0-9]+'|xargs`
if [ -z $trainers ]; then
trainers=1
fi
threads=$((processors / trainers))
if [ $threads -eq 0 ]; then
threads=1
fi
if [ -z "$OMP_NUM_THREADS" ]; then
export OMP_NUM_THREADS=$threads
fi
if [ -z "$MKL_NUM_THREADS" ]; then
export MKL_NUM_THREADS=$threads
fi
}
PADDLE_CONF_HOME="$HOME/.config/paddle" PADDLE_CONF_HOME="$HOME/.config/paddle"
mkdir -p ${PADDLE_CONF_HOME} mkdir -p ${PADDLE_CONF_HOME}
...@@ -92,9 +140,13 @@ else: ...@@ -92,9 +140,13 @@ else:
sys.exit(0) sys.exit(0)
EOF EOF
cpu_config
# echo $KMP_AFFINITY $OMP_DYNAMIC
case "$1" in case "$1" in
"train") "train")
threads_config $@
# echo $OMP_NUM_THREADS $MKL_NUM_THREADS
${DEBUGGER} $PADDLE_BIN_PATH/paddle_trainer ${@:2} ${DEBUGGER} $PADDLE_BIN_PATH/paddle_trainer ${@:2}
;; ;;
"merge_model") "merge_model")
......
...@@ -76,6 +76,31 @@ def init(**kwargs): ...@@ -76,6 +76,31 @@ def init(**kwargs):
for key in args_dict.keys(): for key in args_dict.keys():
args.append('--%s=%s' % (key, str(args_dict[key]))) args.append('--%s=%s' % (key, str(args_dict[key])))
# auto set cpu environment
def set_env(key, value):
'''If the key has not been set in the environment, set it with value.'''
assert isinstance(key, str)
assert isinstance(value, str)
envset = os.environ.get(key)
if envset is None:
os.environ[key] = value
ht = os.popen("lscpu |grep \"per core\"|awk -F':' '{print $2}'|xargs")
ht = int(ht.read())
if ht == 1: # ht is off
set_env("OMP_DYNAMIC", "false")
set_env("KMP_AFFINITY", "granularity=fine,compact,0,0")
else:
set_env("OMP_DYNAMIC", "true")
set_env("KMP_AFFINITY", "granularity=fine,compact,1,0")
processors = os.popen("grep \"processor\" /proc/cpuinfo|sort -u|wc -l")
processors = int(processors.read())
trainers = kwargs.get('trainer_count', 1)
threads = processors / trainers
threads = '1' if threads < 1 else str(threads)
set_env("OMP_NUM_THREADS", threads)
set_env("MKL_NUM_THREADS", threads)
if 'use_gpu' in kwargs: if 'use_gpu' in kwargs:
cp.g_command_config_args['use_gpu'] = kwargs['use_gpu'] cp.g_command_config_args['use_gpu'] = kwargs['use_gpu']
if 'use_mkldnn' in kwargs: if 'use_mkldnn' in kwargs:
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册