diff --git a/benchmark/paddle/image/googlenet.py b/benchmark/paddle/image/googlenet.py index a88ecac67d9e677f14f6dc24ba9a337b1245243f..7059c13bd2c2b98eb3fbcf633a6f7064e54d5402 100644 --- a/benchmark/paddle/image/googlenet.py +++ b/benchmark/paddle/image/googlenet.py @@ -6,10 +6,21 @@ width = 224 num_class = 1000 batch_size = get_config_arg('batch_size', int, 128) use_gpu = get_config_arg('use_gpu', bool, True) - -args = {'height': height, 'width': width, 'color': True, 'num_class': num_class} +is_infer = get_config_arg("is_infer", bool, False) + +args = { + 'height': height, + 'width': width, + 'color': True, + 'num_class': num_class, + 'is_infer': is_infer +} define_py_data_sources2( - "train.list", None, module="provider", obj="process", args=args) + "train.list" if not is_infer else None, + "test.list" if is_infer else None, + module="provider", + obj="process", + args=args) settings( batch_size=batch_size, @@ -146,7 +157,6 @@ def inception(name, input, channels, \ return cat -lab = data_layer(name="label", size=1000) data = data_layer(name="input", size=3 * height * width) # stage 1 @@ -224,6 +234,10 @@ pool5 = img_pool_layer( dropout = dropout_layer(name="dropout", input=pool5, dropout_rate=0.4) out3 = fc_layer( name="output3", input=dropout, size=1000, act=SoftmaxActivation()) -loss3 = cross_entropy(name='loss3', input=out3, label=lab) -outputs(loss3) +if is_infer: + outputs(out3) +else: + lab = data_layer(name="label", size=num_class) + loss3 = cross_entropy(name='loss3', input=out3, label=lab) + outputs(loss3) diff --git a/benchmark/paddle/image/provider.py b/benchmark/paddle/image/provider.py index 4703944c8722552d56ba80a8e0663de5fb4df53d..927b1759941f362ef4b5ffe84dd01332986d9306 100644 --- a/benchmark/paddle/image/provider.py +++ b/benchmark/paddle/image/provider.py @@ -13,14 +13,20 @@ def initHook(settings, height, width, color, num_class, **kwargs): settings.data_size = settings.height * settings.width * 3 else: settings.data_size = settings.height * settings.width - - settings.slots = [dense_vector(settings.data_size), integer_value(1)] + settings.is_infer = kwargs.get('is_infer', False) + if settings.is_infer: + settings.slots = [dense_vector(settings.data_size)] + else: + settings.slots = [dense_vector(settings.data_size), integer_value(1)] @provider( init_hook=initHook, min_pool_size=-1, cache=CacheType.CACHE_PASS_IN_MEM) def process(settings, file_list): - for i in xrange(1024): + for i in xrange(2560 if settings.is_infer else 1024): img = np.random.rand(1, settings.data_size).reshape(-1, 1).flatten() - lab = random.randint(0, settings.num_class - 1) - yield img.astype('float32'), int(lab) + if settings.is_infer: + yield img.astype('float32') + else: + lab = random.randint(0, settings.num_class - 1) + yield img.astype('float32'), int(lab) diff --git a/benchmark/paddle/image/resnet.py b/benchmark/paddle/image/resnet.py index 6ae1857642e8df4b3859eec68a3a5227d1c4fcb3..4a14363ff1db48a5072cbb5f5eb3bc9241ffca8f 100644 --- a/benchmark/paddle/image/resnet.py +++ b/benchmark/paddle/image/resnet.py @@ -6,11 +6,21 @@ width = 224 num_class = 1000 batch_size = get_config_arg('batch_size', int, 64) layer_num = get_config_arg("layer_num", int, 50) -is_test = get_config_arg("is_test", bool, False) - -args = {'height': height, 'width': width, 'color': True, 'num_class': num_class} +is_infer = get_config_arg("is_infer", bool, False) + +args = { + 'height': height, + 'width': width, + 'color': True, + 'num_class': num_class, + 'is_infer': is_infer +} define_py_data_sources2( - "train.list", None, module="provider", obj="process", args=args) + "train.list" if not is_infer else None, + "test.list" if is_infer else None, + module="provider", + obj="process", + args=args) settings( batch_size=batch_size, @@ -45,7 +55,10 @@ def conv_bn_layer(name, act=LinearActivation(), bias_attr=False) return batch_norm_layer( - name=name + "_bn", input=tmp, act=active_type, use_global_stats=is_test) + name=name + "_bn", + input=tmp, + act=active_type, + use_global_stats=is_infer) def bottleneck_block(name, input, num_filters1, num_filters2): @@ -207,7 +220,9 @@ elif layer_num == 152: else: print("Wrong layer number.") -lbl = data_layer(name="label", size=num_class) -loss = cross_entropy(name='loss', input=resnet, label=lbl) -inputs(img, lbl) -outputs(loss) +if is_infer: + outputs(resnet) +else: + lbl = data_layer(name="label", size=num_class) + loss = cross_entropy(name='loss', input=resnet, label=lbl) + outputs(loss) diff --git a/benchmark/paddle/image/run_mkldnn_infer.sh b/benchmark/paddle/image/run_mkldnn_infer.sh new file mode 100755 index 0000000000000000000000000000000000000000..03a76c0540092501b33e1fdd430ae4e754744fd0 --- /dev/null +++ b/benchmark/paddle/image/run_mkldnn_infer.sh @@ -0,0 +1,86 @@ +set -e + +function clock_to_seconds() { + hours=`echo $1 | awk -F ':' '{print $1}'` + mins=`echo $1 | awk -F ':' '{print $2}'` + secs=`echo $1 | awk -F ':' '{print $3}'` + echo `bc -l <<< "$secs + $mins * 60 + $hours * 3600"` +} + +function infer() { + unset OMP_NUM_THREADS MKL_NUM_THREADS OMP_DYNAMIC KMP_AFFINITY + topology=$1 + layer_num=$2 + bs=$3 + use_mkldnn=$4 + if [ $4 == "True" ]; then + thread=1 + log="logs/infer-${topology}-${layer_num}-mkldnn-${bs}.log" + elif [ $4 == "False" ]; then + thread=`nproc` + if [ $thread -gt $bs ]; then + thread=$bs + fi + log="logs/infer-${topology}-${layer_num}-${thread}mklml-${bs}.log" + else + echo "Wrong input $4, use True or False." + exit 0 + fi + + models_in="models/${topology}-${layer_num}/pass-00000/" + if [ ! -d $models_in ]; then + echo "Training model ${topology}_${layer_num}" + paddle train --job=train \ + --config="${topology}.py" \ + --use_mkldnn=True \ + --use_gpu=False \ + --trainer_count=1 \ + --num_passes=1 \ + --save_dir="models/${topology}-${layer_num}" \ + --config_args="batch_size=128,layer_num=${layer_num}" \ + > /dev/null 2>&1 + echo "Done" + fi + log_period=$((256 / bs)) + paddle train --job=test \ + --config="${topology}.py" \ + --use_mkldnn=$use_mkldnn \ + --use_gpu=False \ + --trainer_count=$thread \ + --log_period=$log_period \ + --config_args="batch_size=${bs},layer_num=${layer_num},is_infer=True" \ + --init_model_path=$models_in \ + 2>&1 | tee ${log} + + # calculate the last 5 logs period time of 1280 samples, + # the time before are burning time. + start=`tail ${log} -n 7 | head -n 1 | awk -F ' ' '{print $2}' | xargs` + end=`tail ${log} -n 2 | head -n 1 | awk -F ' ' '{print $2}' | xargs` + start_sec=`clock_to_seconds $start` + end_sec=`clock_to_seconds $end` + fps=`bc <<< "scale = 2; 1280 / ($end_sec - $start_sec)"` + echo "Last 1280 samples start: ${start}(${start_sec} sec), end: ${end}(${end_sec} sec;" >> ${log} + echo "FPS: $fps images/sec" >> ${log} +} + +if [ ! -f "train.list" ]; then + echo " " > train.list +fi +if [ ! -f "test.list" ]; then + echo " " > test.list +fi +if [ ! -d "logs" ]; then + mkdir logs +fi +if [ ! -d "models" ]; then + mkdir -p models +fi + +# inference benchmark +for use_mkldnn in True False; do + for batchsize in 1 2 4 8 16; do + infer googlenet v1 $batchsize $use_mkldnn + infer resnet 50 $batchsize $use_mkldnn + infer vgg 19 $batchsize $use_mkldnn + done +done diff --git a/benchmark/paddle/image/run_mkldnn.sh b/benchmark/paddle/image/run_mkldnn_train.sh similarity index 79% rename from benchmark/paddle/image/run_mkldnn.sh rename to benchmark/paddle/image/run_mkldnn_train.sh index f768f6c29a84b40f917e0ccfde4d8c15f65c818b..320206239ae960bd088b05d3b10934a98da741b1 100755 --- a/benchmark/paddle/image/run_mkldnn.sh +++ b/benchmark/paddle/image/run_mkldnn_train.sh @@ -8,13 +8,13 @@ function train() { use_mkldnn=$4 if [ $4 == "True" ]; then thread=1 - log="logs/${topology}-${layer_num}-mkldnn-${bs}.log" + log="logs/train-${topology}-${layer_num}-mkldnn-${bs}.log" elif [ $4 == "False" ]; then thread=`nproc` # each trainer_count use only 1 core to avoid conflict - log="logs/${topology}-${layer_num}-${thread}mklml-${bs}.log" + log="logs/train-${topology}-${layer_num}-${thread}mklml-${bs}.log" else - echo "Wrong input $3, use True or False." + echo "Wrong input $4, use True or False." exit 0 fi args="batch_size=${bs},layer_num=${layer_num}" @@ -30,13 +30,14 @@ function train() { 2>&1 | tee ${log} } -if [ ! -d "train.list" ]; then +if [ ! -f "train.list" ]; then echo " " > train.list fi if [ ! -d "logs" ]; then mkdir logs fi +# training benchmark for use_mkldnn in True False; do for batchsize in 64 128 256; do train vgg 19 $batchsize $use_mkldnn diff --git a/benchmark/paddle/image/vgg.py b/benchmark/paddle/image/vgg.py index 420884ed8e1ae36a3f1772bfbe8323f3d0ea71e6..8d0a1e97a451cd52ef17e4e326673cc90059ef3c 100644 --- a/benchmark/paddle/image/vgg.py +++ b/benchmark/paddle/image/vgg.py @@ -6,10 +6,21 @@ width = 224 num_class = 1000 batch_size = get_config_arg('batch_size', int, 64) layer_num = get_config_arg('layer_num', int, 19) +is_infer = get_config_arg("is_infer", bool, False) -args = {'height': height, 'width': width, 'color': True, 'num_class': num_class} +args = { + 'height': height, + 'width': width, + 'color': True, + 'num_class': num_class, + 'is_infer': is_infer +} define_py_data_sources2( - "train.list", None, module="provider", obj="process", args=args) + "train.list" if not is_infer else None, + "test.list" if is_infer else None, + module="provider", + obj="process", + args=args) settings( batch_size=batch_size, @@ -98,6 +109,9 @@ elif layer_num == 19: else: print("Wrong layer number.") -lab = data_layer('label', num_class) -loss = cross_entropy(input=vgg, label=lab) -outputs(loss) +if is_infer: + outputs(vgg) +else: + lab = data_layer('label', num_class) + loss = cross_entropy(input=vgg, label=lab) + outputs(loss)