From 63250a65de59a5d71d38d4d711ef8de766028460 Mon Sep 17 00:00:00 2001 From: zhaojiaying01 Date: Mon, 13 Aug 2018 21:05:05 +0800 Subject: [PATCH] update unit test and performance data of squeezenet and yolo --- README.md | 6 ++++++ src/operators/fusion_conv_add_relu_op.h | 12 +++++++----- test/common/test_gemm_perf.cpp | 3 +++ test/net/test_resnet.cpp | 16 +++++++++++----- test/net/test_squeezenet.cpp | 16 +++++++++++----- test/net/test_yolo.cpp | 16 +++++++++++----- 6 files changed, 49 insertions(+), 20 deletions(-) diff --git a/README.md b/README.md index c29165d572..825c417c5f 100644 --- a/README.md +++ b/README.md @@ -35,6 +35,12 @@ Paddle-Moible是PaddlePaddle组织下的项目,是一个致力于嵌入式平 ||||| |googlenet(v1) arm v7|1线程|2线程|4线程| |麒麟960(ms)|348.018|242.689|169.998| +||||| +|squeezenet arm v7|1线程|2线程|4线程| +|麒麟960(ms)|84.685|56.544|38.833| +||||| +|yolo arm v7|1线程|2线程|4线程| +|麒麟960(ms)|131.831|88.990|60.905| arm cpu是paddle-mobile的主要支持方向,cpu的通用性一直是其优势。嵌入式深度学习,需要大量的cpu汇编实现。我们正在紧锣密鼓的编码,为的是能充分硬件的每一点加速能力。 arm cpu的优化工作还在进行中,现在使用了常规的cpu优化。在arm a73上paddle-mobile arm-v7现在单核运行一次mobilenet1.0是110+ms,显然这不是我们的最终目标,我们正在用大量的汇编改写,后续性能仍会有巨大提升空间, 目前只支持armv7, 未来我们也会支持armv8。 diff --git a/src/operators/fusion_conv_add_relu_op.h b/src/operators/fusion_conv_add_relu_op.h index cda97ba1a3..e8a9498819 100644 --- a/src/operators/fusion_conv_add_relu_op.h +++ b/src/operators/fusion_conv_add_relu_op.h @@ -16,6 +16,8 @@ limitations under the License. */ #pragma once +#include +#include #include "framework/operator.h" #include "framework/program/program-optimize/fusion_op_register.h" #include "operators/kernel/conv_add_relu_kernel.h" @@ -65,11 +67,11 @@ class FusionConvAddReluOp : public framework::OperatorWithKernel< #ifdef PADDLE_MOBILE_CPU -//#ifndef CONV_ADD_RELU_REGISTER -//#define CONV_ADD_RELU_REGISTER -// static framework::FusionOpRegistrar fusion_conv_add_relu_registrar(new -// FusionConvAddReluOpMatcher()); -//#endif +#ifndef CONV_ADD_RELU_REGISTER +#define CONV_ADD_RELU_REGISTER +static framework::FusionOpRegistrar fusion_conv_add_relu_registrar( + new FusionConvAddReluOpMatcher()); +#endif #endif #ifdef PADDLE_MOBILE_MALI_GPU diff --git a/test/common/test_gemm_perf.cpp b/test/common/test_gemm_perf.cpp index c505c61fce..386c09d71a 100644 --- a/test/common/test_gemm_perf.cpp +++ b/test/common/test_gemm_perf.cpp @@ -14,6 +14,7 @@ limitations under the License. */ #include #include "../test_helper.h" +#include "../test_include.h" #include "operators/math/gemm.h" #include "operators/math/math_function.h" @@ -26,6 +27,8 @@ limitations under the License. */ #define k 1024 int main() { + paddle_mobile::PaddleMobile paddle_mobile; + paddle_mobile.SetThreadNum(4); Tensor aa, bb, cc, scale, bias; auto aaptr = aa.mutable_data({m, k}); auto bbptr = bb.mutable_data({k, n}); diff --git a/test/net/test_resnet.cpp b/test/net/test_resnet.cpp index 883ad95392..73ac88ef77 100644 --- a/test/net/test_resnet.cpp +++ b/test/net/test_resnet.cpp @@ -12,16 +12,17 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#include +#include #include "../test_helper.h" #include "../test_include.h" int main() { paddle_mobile::PaddleMobile paddle_mobile; + paddle_mobile.SetThreadNum(4); auto time1 = time(); - if (paddle_mobile.Load(g_resnet, false)) { + if (paddle_mobile.Load(g_resnet, true)) { auto time2 = time(); - DLOG << "load cost :" << time_diff(time1, time1) << "ms"; + std::cout << "load cost :" << time_diff(time1, time1) << "ms" << std::endl; std::vector dims{1, 3, 32, 32}; Tensor input_tensor; SetupTensor(&input_tensor, {1, 3, 32, 32}, static_cast(0), @@ -29,10 +30,15 @@ int main() { std::vector input(input_tensor.data(), input_tensor.data() + input_tensor.numel()); - auto time3 = time(); + // 预热一次 paddle_mobile.Predict(input, dims); + auto time3 = time(); + for (int i = 0; i < 10; ++i) { + paddle_mobile.Predict(input, dims); + } auto time4 = time(); - DLOG << "predict cost :" << time_diff(time3, time4) << "ms"; + std::cout << "predict cost :" << time_diff(time3, time4) << "ms" + << std::endl; } return 0; diff --git a/test/net/test_squeezenet.cpp b/test/net/test_squeezenet.cpp index 39d4687ff3..4c14f63bde 100644 --- a/test/net/test_squeezenet.cpp +++ b/test/net/test_squeezenet.cpp @@ -12,18 +12,19 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#include +#include #include "../test_helper.h" #include "../test_include.h" int main() { paddle_mobile::PaddleMobile paddle_mobile; + paddle_mobile.SetThreadNum(2); // ../../../test/models/googlenet // ../../../test/models/mobilenet auto time1 = time(); - if (paddle_mobile.Load(g_squeezenet, false)) { + if (paddle_mobile.Load(g_squeezenet, true)) { auto time2 = time(); - DLOG << "load cost :" << time_diff(time1, time1) << "ms"; + std::cout << "load cost :" << time_diff(time1, time1) << "ms" << std::endl; std::vector dims{1, 3, 227, 227}; Tensor input_tensor; SetupTensor(&input_tensor, {1, 3, 227, 227}, static_cast(0), @@ -31,10 +32,15 @@ int main() { std::vector input(input_tensor.data(), input_tensor.data() + input_tensor.numel()); - auto time3 = time(); + // 预热一次 paddle_mobile.Predict(input, dims); + auto time3 = time(); + for (int i = 0; i < 10; ++i) { + paddle_mobile.Predict(input, dims); + } auto time4 = time(); - DLOG << "predict cost :" << time_diff(time3, time4) << "ms"; + std::cout << "predict cost :" << time_diff(time3, time4) / 10 << "ms" + << std::endl; } return 0; diff --git a/test/net/test_yolo.cpp b/test/net/test_yolo.cpp index 65dec59ad0..83508cff33 100644 --- a/test/net/test_yolo.cpp +++ b/test/net/test_yolo.cpp @@ -12,18 +12,19 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#include +#include #include "../test_helper.h" #include "../test_include.h" int main() { paddle_mobile::PaddleMobile paddle_mobile; + paddle_mobile.SetThreadNum(2); // ../../../test/models/googlenet // ../../../test/models/mobilenet auto time1 = time(); - if (paddle_mobile.Load(g_yolo, false)) { + if (paddle_mobile.Load(g_yolo, true)) { auto time2 = time(); - DLOG << "load cost :" << time_diff(time1, time1) << "ms"; + std::cout << "load cost :" << time_diff(time1, time1) << "ms" << std::endl; std::vector dims{1, 3, 227, 227}; Tensor input_tensor; @@ -32,10 +33,15 @@ int main() { std::vector input(input_tensor.data(), input_tensor.data() + input_tensor.numel()); - auto time3 = time(); + // 预热一次 paddle_mobile.Predict(input, dims); + auto time3 = time(); + for (int i = 0; i < 10; ++i) { + paddle_mobile.Predict(input, dims); + } auto time4 = time(); - DLOG << "predict cost :" << time_diff(time3, time4) << "ms"; + std::cout << "predict cost :" << time_diff(time3, time4) / 10 << "ms" + << std::endl; } return 0; } -- GitLab