From 63250a65de59a5d71d38d4d711ef8de766028460 Mon Sep 17 00:00:00 2001
From: zhaojiaying01 <zhaojiaying01@baidu.com>
Date: Mon, 13 Aug 2018 21:05:05 +0800
Subject: [PATCH] update unit test and performance data of squeezenet and yolo

---
 README.md                               |  6 ++++++
 src/operators/fusion_conv_add_relu_op.h | 12 +++++++-----
 test/common/test_gemm_perf.cpp          |  3 +++
 test/net/test_resnet.cpp                | 16 +++++++++++-----
 test/net/test_squeezenet.cpp            | 16 +++++++++++-----
 test/net/test_yolo.cpp                  | 16 +++++++++++-----
 6 files changed, 49 insertions(+), 20 deletions(-)
diff --git a/README.md b/README.md
index c29165d572..825c417c5f 100644
--- a/README.md
+++ b/README.md
@@ -35,6 +35,12 @@ Paddle-Moible是PaddlePaddle组织下的项目，是一个致力于嵌入式平
 |||||
 |googlenet(v1) arm v7|1线程|2线程|4线程|
 |麒麟960(ms)|348.018|242.689|169.998|
+|||||
+|squeezenet arm v7|1线程|2线程|4线程|
+|麒麟960(ms)|84.685|56.544|38.833|
+|||||
+|yolo arm v7|1线程|2线程|4线程|
+|麒麟960(ms)|131.831|88.990|60.905|
 
     arm cpu是paddle-mobile的主要支持方向，cpu的通用性一直是其优势。嵌入式深度学习，需要大量的cpu汇编实现。我们正在紧锣密鼓的编码，为的是能充分硬件的每一点加速能力。
     arm cpu的优化工作还在进行中，现在使用了常规的cpu优化。在arm a73上paddle-mobile arm-v7现在单核运行一次mobilenet1.0是110+ms，显然这不是我们的最终目标，我们正在用大量的汇编改写，后续性能仍会有巨大提升空间, 目前只支持armv7, 未来我们也会支持armv8。
diff --git a/src/operators/fusion_conv_add_relu_op.h b/src/operators/fusion_conv_add_relu_op.h
index cda97ba1a3..e8a9498819 100644
--- a/src/operators/fusion_conv_add_relu_op.h
+++ b/src/operators/fusion_conv_add_relu_op.h
@@ -16,6 +16,8 @@ limitations under the License. */
 
 #pragma once
 
+#include <string>
+#include <vector>
 #include "framework/operator.h"
 #include "framework/program/program-optimize/fusion_op_register.h"
 #include "operators/kernel/conv_add_relu_kernel.h"
@@ -65,11 +67,11 @@ class FusionConvAddReluOp : public framework::OperatorWithKernel<
 
 #ifdef PADDLE_MOBILE_CPU
 
-//#ifndef CONV_ADD_RELU_REGISTER
-//#define CONV_ADD_RELU_REGISTER
-// static framework::FusionOpRegistrar fusion_conv_add_relu_registrar(new
-// FusionConvAddReluOpMatcher());
-//#endif
+#ifndef CONV_ADD_RELU_REGISTER
+#define CONV_ADD_RELU_REGISTER
+static framework::FusionOpRegistrar fusion_conv_add_relu_registrar(
+    new FusionConvAddReluOpMatcher());
+#endif
 
 #endif
 #ifdef PADDLE_MOBILE_MALI_GPU
diff --git a/test/common/test_gemm_perf.cpp b/test/common/test_gemm_perf.cpp
index c505c61fce..386c09d71a 100644
--- a/test/common/test_gemm_perf.cpp
+++ b/test/common/test_gemm_perf.cpp
@@ -14,6 +14,7 @@ limitations under the License. */
 
 #include <iostream>
 #include "../test_helper.h"
+#include "../test_include.h"
 #include "operators/math/gemm.h"
 #include "operators/math/math_function.h"
 
@@ -26,6 +27,8 @@ limitations under the License. */
 #define k 1024
 
 int main() {
+  paddle_mobile::PaddleMobile<paddle_mobile::CPU> paddle_mobile;
+  paddle_mobile.SetThreadNum(4);
   Tensor aa, bb, cc, scale, bias;
   auto aaptr = aa.mutable_data<float>({m, k});
   auto bbptr = bb.mutable_data<float>({k, n});
diff --git a/test/net/test_resnet.cpp b/test/net/test_resnet.cpp
index 883ad95392..73ac88ef77 100644
--- a/test/net/test_resnet.cpp
+++ b/test/net/test_resnet.cpp
@@ -12,16 +12,17 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
 
-#include <fstream>
+#include <iostream>
 #include "../test_helper.h"
 #include "../test_include.h"
 
 int main() {
   paddle_mobile::PaddleMobile<paddle_mobile::CPU> paddle_mobile;
+  paddle_mobile.SetThreadNum(4);
   auto time1 = time();
-  if (paddle_mobile.Load(g_resnet, false)) {
+  if (paddle_mobile.Load(g_resnet, true)) {
     auto time2 = time();
-    DLOG << "load cost :" << time_diff(time1, time1) << "ms";
+    std::cout << "load cost :" << time_diff(time1, time1) << "ms" << std::endl;
     std::vector<int64_t> dims{1, 3, 32, 32};
     Tensor input_tensor;
     SetupTensor<float>(&input_tensor, {1, 3, 32, 32}, static_cast<float>(0),
@@ -29,10 +30,15 @@ int main() {
 
     std::vector<float> input(input_tensor.data<float>(),
                              input_tensor.data<float>() + input_tensor.numel());
-    auto time3 = time();
+    // 预热一次
     paddle_mobile.Predict(input, dims);
+    auto time3 = time();
+    for (int i = 0; i < 10; ++i) {
+      paddle_mobile.Predict(input, dims);
+    }
     auto time4 = time();
-    DLOG << "predict cost :" << time_diff(time3, time4) << "ms";
+    std::cout << "predict cost :" << time_diff(time3, time4) << "ms"
+              << std::endl;
   }
 
   return 0;
diff --git a/test/net/test_squeezenet.cpp b/test/net/test_squeezenet.cpp
index 39d4687ff3..4c14f63bde 100644
--- a/test/net/test_squeezenet.cpp
+++ b/test/net/test_squeezenet.cpp
@@ -12,18 +12,19 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
 
-#include <fstream>
+#include <iostream>
 #include "../test_helper.h"
 #include "../test_include.h"
 
 int main() {
   paddle_mobile::PaddleMobile<paddle_mobile::CPU> paddle_mobile;
+  paddle_mobile.SetThreadNum(2);
   //  ../../../test/models/googlenet
   //  ../../../test/models/mobilenet
   auto time1 = time();
-  if (paddle_mobile.Load(g_squeezenet, false)) {
+  if (paddle_mobile.Load(g_squeezenet, true)) {
     auto time2 = time();
-    DLOG << "load cost :" << time_diff(time1, time1) << "ms";
+    std::cout << "load cost :" << time_diff(time1, time1) << "ms" << std::endl;
     std::vector<int64_t> dims{1, 3, 227, 227};
     Tensor input_tensor;
     SetupTensor<float>(&input_tensor, {1, 3, 227, 227}, static_cast<float>(0),
@@ -31,10 +32,15 @@ int main() {
 
     std::vector<float> input(input_tensor.data<float>(),
                              input_tensor.data<float>() + input_tensor.numel());
-    auto time3 = time();
+    // 预热一次
     paddle_mobile.Predict(input, dims);
+    auto time3 = time();
+    for (int i = 0; i < 10; ++i) {
+      paddle_mobile.Predict(input, dims);
+    }
     auto time4 = time();
-    DLOG << "predict cost :" << time_diff(time3, time4) << "ms";
+    std::cout << "predict cost :" << time_diff(time3, time4) / 10 << "ms"
+              << std::endl;
   }
 
   return 0;
diff --git a/test/net/test_yolo.cpp b/test/net/test_yolo.cpp
index 65dec59ad0..83508cff33 100644
--- a/test/net/test_yolo.cpp
+++ b/test/net/test_yolo.cpp
@@ -12,18 +12,19 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
 
-#include <fstream>
+#include <iostream>
 #include "../test_helper.h"
 #include "../test_include.h"
 
 int main() {
   paddle_mobile::PaddleMobile<paddle_mobile::CPU> paddle_mobile;
+  paddle_mobile.SetThreadNum(2);
   //  ../../../test/models/googlenet
   //  ../../../test/models/mobilenet
   auto time1 = time();
-  if (paddle_mobile.Load(g_yolo, false)) {
+  if (paddle_mobile.Load(g_yolo, true)) {
     auto time2 = time();
-    DLOG << "load cost :" << time_diff(time1, time1) << "ms";
+    std::cout << "load cost :" << time_diff(time1, time1) << "ms" << std::endl;
 
     std::vector<int64_t> dims{1, 3, 227, 227};
     Tensor input_tensor;
@@ -32,10 +33,15 @@ int main() {
 
     std::vector<float> input(input_tensor.data<float>(),
                              input_tensor.data<float>() + input_tensor.numel());
-    auto time3 = time();
+    // 预热一次
     paddle_mobile.Predict(input, dims);
+    auto time3 = time();
+    for (int i = 0; i < 10; ++i) {
+      paddle_mobile.Predict(input, dims);
+    }
     auto time4 = time();
-    DLOG << "predict cost :" << time_diff(time3, time4) << "ms";
+    std::cout << "predict cost :" << time_diff(time3, time4) / 10 << "ms"
+              << std::endl;
   }
   return 0;
 }
-- 
GitLab