diff --git a/benchmark/IntelOptimizedPaddle.md b/benchmark/IntelOptimizedPaddle.md index 084d3237d9cfe9ca4837f77cf5f70a2449cfcc03..8b7dc5b7db800896eb4de2054ab5e584aed93999 100644 --- a/benchmark/IntelOptimizedPaddle.md +++ b/benchmark/IntelOptimizedPaddle.md @@ -7,11 +7,11 @@ Machine: System: CentOS release 6.3 (Final), Docker 1.12.1. -PaddlePaddle: (TODO: will rerun after 0.11.0) -- paddlepaddle/paddle:latest (for MKLML and MKL-DNN) +PaddlePaddle: +- paddlepaddle/paddle:0.11.0 (for MKLML and MKL-DNN) - MKL-DNN tag v0.11 - MKLML 2018.0.1.20171007 -- paddlepaddle/paddle:latest-openblas (for OpenBLAS) +- paddlepaddle/paddle:0.11.0-openblas (for OpenBLAS) - OpenBLAS v0.2.20 On each machine, we will test and compare the performance of training on single node using MKL-DNN / MKLML / OpenBLAS respectively. @@ -56,15 +56,15 @@ Input image size - 3 * 224 * 224, Time: images/second -- Alexnet +- AlexNet | BatchSize | 64 | 128 | 256 | |--------------|--------| ------ | -------| -| OpenBLAS | 2.13 | 2.45 | 2.68 | +| OpenBLAS | 45.62 | 72.79 | 107.22 | | MKLML | 66.37 | 105.60 | 144.04 | | MKL-DNN | 399.00 | 498.94 | 626.53 | -chart TBD + #### Inference Test on batch size 1, 2, 4, 8, 16 on Intel(R) Xeon(R) Gold 6148 CPU @ 2.40GHz @@ -72,36 +72,41 @@ Test on batch size 1, 2, 4, 8, 16 on Intel(R) Xeon(R) Gold 6148 CPU @ 2.40GHz | BatchSize | 1 | 2 | 4 | 8 | 16 | |-----------|-------|-------|-------|-------|-------| -| OpenBLAS | 1.07 | 1.08 | 1.06 | 0.88 | 0.65 | +| OpenBLAS | 1.10 | 1.96 | 3.62 | 3.63 | 2.25 | | MKLML | 5.58 | 9.80 | 15.15 | 21.21 | 28.67 | | MKL-DNN | 75.07 | 88.64 | 82.58 | 92.29 | 96.75 | + + - ResNet-50 | BatchSize | 1 | 2 | 4 | 8 | 16 | |-----------|-------|--------|--------|--------|--------| -| OpenBLAS | 3.35 | 3.19 | 3.09 | 2.55 | 1.96 | +| OpenBLAS | 3.31 | 6.72 | 11.59 | 13.17 | 9.27 | | MKLML | 6.33 | 12.02 | 22.88 | 40.53 | 63.09 | | MKL-DNN | 107.83| 148.84 | 177.78 | 189.35 | 217.69 | + - GoogLeNet | BatchSize | 1 | 2 | 4 | 8 | 16 | |-----------|--------|--------|--------|--------|--------| -| OpenBLAS | 12.04 | 11.31 | 10.00 | 9.07 | 4.34 | +| OpenBLAS | 12.06 | 23.56 | 34.48 | 36.45 | 23.12 | | MKLML | 22.74 | 41.56 | 81.22 | 133.47 | 210.53 | | MKL-DNN | 175.10 | 272.92 | 450.70 | 512.00 | 600.94 | -- Alexnet + + +- AlexNet | BatchSize | 1 | 2 | 4 | 8 | 16 | |-----------|--------|--------|--------|--------|--------| -| OpenBLAS | | | | | | +| OpenBLAS | 3.53 | 6.23 | 15.04 | 26.06 | 31.62 | | MKLML | 21.32 | 36.55 | 73.06 | 131.15 | 192.77 | | MKL-DNN | 442.91 | 656.41 | 719.10 | 847.68 | 850.51 | -chart TBD + ### Laptop TBD diff --git a/benchmark/figs/alexnet-cpu-infer.png b/benchmark/figs/alexnet-cpu-infer.png new file mode 100644 index 0000000000000000000000000000000000000000..6215ae4e4288f969a909c258ddd5b5f51e6abb3f Binary files /dev/null and b/benchmark/figs/alexnet-cpu-infer.png differ diff --git a/benchmark/figs/alexnet-cpu-train.png b/benchmark/figs/alexnet-cpu-train.png new file mode 100644 index 0000000000000000000000000000000000000000..b3200bbc049a9d75857fb5692902d7b475aa8f68 Binary files /dev/null and b/benchmark/figs/alexnet-cpu-train.png differ diff --git a/benchmark/figs/googlenet-cpu-infer.png b/benchmark/figs/googlenet-cpu-infer.png new file mode 100644 index 0000000000000000000000000000000000000000..19478d433bae651f4506153ded11a96d5137b409 Binary files /dev/null and b/benchmark/figs/googlenet-cpu-infer.png differ diff --git a/benchmark/figs/googlenet-cpu-train.png b/benchmark/figs/googlenet-cpu-train.png index c3f67faf096fe9b45dd815f294b41679dc7c9e54..4e86e058d0654d02c898bf7f5fe73aa1c7614e20 100644 Binary files a/benchmark/figs/googlenet-cpu-train.png and b/benchmark/figs/googlenet-cpu-train.png differ diff --git a/benchmark/figs/resnet-cpu-infer.png b/benchmark/figs/resnet-cpu-infer.png new file mode 100644 index 0000000000000000000000000000000000000000..bc43d4b8d20c600d6f1046a5986a6c62adfa6b44 Binary files /dev/null and b/benchmark/figs/resnet-cpu-infer.png differ diff --git a/benchmark/figs/resnet-cpu-train.png b/benchmark/figs/resnet-cpu-train.png index b96ecd5ff940c0d000613b1ed1f11fb16796cf47..96746b1759fa17d25ac5f40ed3678e16086364ba 100644 Binary files a/benchmark/figs/resnet-cpu-train.png and b/benchmark/figs/resnet-cpu-train.png differ diff --git a/benchmark/figs/vgg-cpu-infer.png b/benchmark/figs/vgg-cpu-infer.png new file mode 100644 index 0000000000000000000000000000000000000000..3a51ec6c474f0e0f0c4384c8ccd1e08c4382230b Binary files /dev/null and b/benchmark/figs/vgg-cpu-infer.png differ diff --git a/benchmark/figs/vgg-cpu-train.png b/benchmark/figs/vgg-cpu-train.png index f830ca6a87d10b72a5113636dd5686ab25a2e864..6d548cfd59f86f8166c011d71ebde4e4b33ef644 100644 Binary files a/benchmark/figs/vgg-cpu-train.png and b/benchmark/figs/vgg-cpu-train.png differ diff --git a/benchmark/paddle/image/run_openblas_infer.sh b/benchmark/paddle/image/run_openblas_infer.sh index 71a49231a5527ebee9f45d5f4650ce2a4f6a1c31..a9a7b8a66717c4be0543c3fe2db293fe199e3dc4 100755 --- a/benchmark/paddle/image/run_openblas_infer.sh +++ b/benchmark/paddle/image/run_openblas_infer.sh @@ -8,6 +8,7 @@ function clock_to_seconds() { } function infer() { + export OPENBLAS_MAIN_FREE=1 topology=$1 layer_num=$2 bs=$3