diff --git a/README.md b/README.md index 8db78ff27542c04d3e89fcd27ba26385c8f44da1..05a109a81791ac85d975138dcd76f7f71716624a 100644 --- a/README.md +++ b/README.md @@ -35,6 +35,12 @@ Paddle-Moible是PaddlePaddle组织下的项目,是一个致力于嵌入式平 ||||| |googlenet(v1) arm v7|1线程|2线程|4线程| |麒麟960(ms)|348.018|240.304|169.998| +||||| +|squeezenet arm v7|1线程|2线程|4线程| +|麒麟960(ms)|84.685|56.544|38.833| +||||| +|yolo arm v7|1线程|2线程|4线程| +|麒麟960(ms)|131.831|88.990|60.905| arm cpu是paddle-mobile的主要支持方向,cpu的通用性一直是其优势。嵌入式深度学习,需要大量的cpu汇编实现。我们正在紧锣密鼓的编码,为的是能充分硬件的每一点加速能力。 arm cpu的优化工作还在进行中,现在使用了常规的cpu优化。在arm a73上paddle-mobile arm-v7现在单核运行一次mobilenet1.0是110+ms,显然这不是我们的最终目标,我们正在用大量的汇编改写,后续性能仍会有巨大提升空间, 目前只支持armv7, 未来我们也会支持armv8。 diff --git a/src/operators/fusion_conv_add_relu_op.h b/src/operators/fusion_conv_add_relu_op.h index cda97ba1a342e5b9451fd8363643f638792e3579..e8a9498819cae330abbd4a007a6510d89f167114 100644 --- a/src/operators/fusion_conv_add_relu_op.h +++ b/src/operators/fusion_conv_add_relu_op.h @@ -16,6 +16,8 @@ limitations under the License. */ #pragma once +#include +#include #include "framework/operator.h" #include "framework/program/program-optimize/fusion_op_register.h" #include "operators/kernel/conv_add_relu_kernel.h" @@ -65,11 +67,11 @@ class FusionConvAddReluOp : public framework::OperatorWithKernel< #ifdef PADDLE_MOBILE_CPU -//#ifndef CONV_ADD_RELU_REGISTER -//#define CONV_ADD_RELU_REGISTER -// static framework::FusionOpRegistrar fusion_conv_add_relu_registrar(new -// FusionConvAddReluOpMatcher()); -//#endif +#ifndef CONV_ADD_RELU_REGISTER +#define CONV_ADD_RELU_REGISTER +static framework::FusionOpRegistrar fusion_conv_add_relu_registrar( + new FusionConvAddReluOpMatcher()); +#endif #endif #ifdef PADDLE_MOBILE_MALI_GPU diff --git a/test/common/test_gemm_perf.cpp b/test/common/test_gemm_perf.cpp index c505c61fce21775136a368949a451999b97b3069..386c09d71a3d5709842991bffd2e8ea039edc940 100644 --- a/test/common/test_gemm_perf.cpp +++ b/test/common/test_gemm_perf.cpp @@ -14,6 +14,7 @@ limitations under the License. */ #include #include "../test_helper.h" +#include "../test_include.h" #include "operators/math/gemm.h" #include "operators/math/math_function.h" @@ -26,6 +27,8 @@ limitations under the License. */ #define k 1024 int main() { + paddle_mobile::PaddleMobile paddle_mobile; + paddle_mobile.SetThreadNum(4); Tensor aa, bb, cc, scale, bias; auto aaptr = aa.mutable_data({m, k}); auto bbptr = bb.mutable_data({k, n}); diff --git a/test/net/test_resnet.cpp b/test/net/test_resnet.cpp index 883ad95392ad351a2634e1a56ac050f02d8767e6..73ac88ef77b0c02545ef55b6493d4681c61c192d 100644 --- a/test/net/test_resnet.cpp +++ b/test/net/test_resnet.cpp @@ -12,16 +12,17 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#include +#include #include "../test_helper.h" #include "../test_include.h" int main() { paddle_mobile::PaddleMobile paddle_mobile; + paddle_mobile.SetThreadNum(4); auto time1 = time(); - if (paddle_mobile.Load(g_resnet, false)) { + if (paddle_mobile.Load(g_resnet, true)) { auto time2 = time(); - DLOG << "load cost :" << time_diff(time1, time1) << "ms"; + std::cout << "load cost :" << time_diff(time1, time1) << "ms" << std::endl; std::vector dims{1, 3, 32, 32}; Tensor input_tensor; SetupTensor(&input_tensor, {1, 3, 32, 32}, static_cast(0), @@ -29,10 +30,15 @@ int main() { std::vector input(input_tensor.data(), input_tensor.data() + input_tensor.numel()); - auto time3 = time(); + // 预热一次 paddle_mobile.Predict(input, dims); + auto time3 = time(); + for (int i = 0; i < 10; ++i) { + paddle_mobile.Predict(input, dims); + } auto time4 = time(); - DLOG << "predict cost :" << time_diff(time3, time4) << "ms"; + std::cout << "predict cost :" << time_diff(time3, time4) << "ms" + << std::endl; } return 0; diff --git a/test/net/test_squeezenet.cpp b/test/net/test_squeezenet.cpp index 39d4687ff3de37c571ee89213485fb0b6bc939df..4c14f63bde40675a7e0016e28d900788431ff2ae 100644 --- a/test/net/test_squeezenet.cpp +++ b/test/net/test_squeezenet.cpp @@ -12,18 +12,19 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#include +#include #include "../test_helper.h" #include "../test_include.h" int main() { paddle_mobile::PaddleMobile paddle_mobile; + paddle_mobile.SetThreadNum(2); // ../../../test/models/googlenet // ../../../test/models/mobilenet auto time1 = time(); - if (paddle_mobile.Load(g_squeezenet, false)) { + if (paddle_mobile.Load(g_squeezenet, true)) { auto time2 = time(); - DLOG << "load cost :" << time_diff(time1, time1) << "ms"; + std::cout << "load cost :" << time_diff(time1, time1) << "ms" << std::endl; std::vector dims{1, 3, 227, 227}; Tensor input_tensor; SetupTensor(&input_tensor, {1, 3, 227, 227}, static_cast(0), @@ -31,10 +32,15 @@ int main() { std::vector input(input_tensor.data(), input_tensor.data() + input_tensor.numel()); - auto time3 = time(); + // 预热一次 paddle_mobile.Predict(input, dims); + auto time3 = time(); + for (int i = 0; i < 10; ++i) { + paddle_mobile.Predict(input, dims); + } auto time4 = time(); - DLOG << "predict cost :" << time_diff(time3, time4) << "ms"; + std::cout << "predict cost :" << time_diff(time3, time4) / 10 << "ms" + << std::endl; } return 0; diff --git a/test/net/test_yolo.cpp b/test/net/test_yolo.cpp index 65dec59ad0579d362c75ae6ec1d362fb957d4fc5..83508cff335c55f5cc416c6652d83706a4626c1a 100644 --- a/test/net/test_yolo.cpp +++ b/test/net/test_yolo.cpp @@ -12,18 +12,19 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#include +#include #include "../test_helper.h" #include "../test_include.h" int main() { paddle_mobile::PaddleMobile paddle_mobile; + paddle_mobile.SetThreadNum(2); // ../../../test/models/googlenet // ../../../test/models/mobilenet auto time1 = time(); - if (paddle_mobile.Load(g_yolo, false)) { + if (paddle_mobile.Load(g_yolo, true)) { auto time2 = time(); - DLOG << "load cost :" << time_diff(time1, time1) << "ms"; + std::cout << "load cost :" << time_diff(time1, time1) << "ms" << std::endl; std::vector dims{1, 3, 227, 227}; Tensor input_tensor; @@ -32,10 +33,15 @@ int main() { std::vector input(input_tensor.data(), input_tensor.data() + input_tensor.numel()); - auto time3 = time(); + // 预热一次 paddle_mobile.Predict(input, dims); + auto time3 = time(); + for (int i = 0; i < 10; ++i) { + paddle_mobile.Predict(input, dims); + } auto time4 = time(); - DLOG << "predict cost :" << time_diff(time3, time4) << "ms"; + std::cout << "predict cost :" << time_diff(time3, time4) / 10 << "ms" + << std::endl; } return 0; }