diff --git a/README.md b/README.md index c9d15d4960a6330ff6614b6dfc8fd20b81386c9c..59ef597dd749ea16658977cd6d548cedaa90d166 100644 --- a/README.md +++ b/README.md @@ -27,7 +27,7 @@ Paddle-Moible是PaddlePaddle组织下的项目,是一个致力于嵌入式平 - **ARM CPU** -![](http://mms-graph.bj.bcebos.com/paddle-mobile%2F2018_07_18.png) +![](http://mms-graph.bj.bcebos.com/paddle-mobile%2F2018_07_29.png) arm cpu是paddle-mobile的主要支持方向,cpu的通用性一直是其优势。嵌入式深度学习,需要大量的cpu汇编实现。我们正在紧锣密鼓的编码,为的是能充分硬件的每一点加速能力。 arm cpu的优化工作还在进行中,现在使用了常规的cpu优化。在arm a73上paddle-mobile arm-v7现在单核运行一次mobilenet1.0是110+ms,显然这不是我们的最终目标,我们正在用大量的汇编改写,后续性能仍会有巨大提升空间, 目前只支持armv7, 未来我们也会支持armv8。 diff --git a/src/common/types.cpp b/src/common/types.cpp index 14924c4a2129292aca32e307569fc8dc9a00f913..2f366eb9e5a10ea11e3153e6e32b18204c6dd9cd 100644 --- a/src/common/types.cpp +++ b/src/common/types.cpp @@ -17,39 +17,39 @@ limitations under the License. */ namespace paddle_mobile { -const std::string G_OP_TYPE_CONV = "conv2d"; -const std::string G_OP_TYPE_BATCHNORM = "batch_norm"; -const std::string G_OP_TYPE_BOX_CODER = "box_coder"; -const std::string G_OP_TYPE_CONCAT = "concat"; -const std::string G_OP_TYPE_ELEMENTWISE_ADD = "elementwise_add"; -const std::string G_OP_TYPE_FUSION_CONV_ADD_RELU = "fusion_conv_add_relu"; -const std::string G_OP_TYPE_FUSION_CONV_ADD_BN_RELU = "fusion_conv_add_bn_relu"; -const std::string G_OP_TYPE_FUSION_DWCONV_BN_RELU = "fusion_dwconv_bn_relu"; -const std::string G_OP_TYPE_FUSION_CONV_BN_RELU = "fusion_conv_bn_relu"; -const std::string G_OP_TYPE_FC = "fusion_fc"; -const std::string G_OP_TYPE_FUSION_CONV_ADD = "fusion_conv_add"; -const std::string G_OP_TYPE_LRN = "lrn"; -const std::string G_OP_TYPE_MUL = "mul"; -const std::string G_OP_TYPE_MULTICLASS_NMS = "multiclass_nms"; -const std::string G_OP_TYPE_POOL2D = "pool2d"; -const std::string G_OP_TYPE_PRIOR_BOX = "prior_box"; -const std::string G_OP_TYPE_RELU = "relu"; -const std::string G_OP_TYPE_RESHAPE = "reshape"; -const std::string G_OP_TYPE_SIGMOID = "sigmoid"; -const std::string G_OP_TYPE_SOFTMAX = "softmax"; -const std::string G_OP_TYPE_TRANSPOSE = "transpose"; -const std::string G_OP_TYPE_SPLIT = "split"; -const std::string G_OP_TYPE_FEED = "feed"; -const std::string G_OP_TYPE_FETCH = "fetch"; -const std::string G_OP_TYPE_DEPTHWISE_CONV = "depthwise_conv2d"; -const std::string G_OP_TYPE_IM2SEQUENCE = "im2sequence"; -const std::string G_OP_TYPE_DROPOUT = "dropout"; -const std::string G_OP_TYPE_FUSION_CONV_ADD_BN = "fusion_conv_add_bn"; -const std::string G_OP_TYPE_FUSION_POOL_BN = "fusion_pool_bn"; -const std::string G_OP_TYPE_FUSION_ELEMENTWISE_ADD_RELU = +const char *G_OP_TYPE_CONV = "conv2d"; +const char *G_OP_TYPE_BATCHNORM = "batch_norm"; +const char *G_OP_TYPE_BOX_CODER = "box_coder"; +const char *G_OP_TYPE_CONCAT = "concat"; +const char *G_OP_TYPE_ELEMENTWISE_ADD = "elementwise_add"; +const char *G_OP_TYPE_FUSION_CONV_ADD_RELU = "fusion_conv_add_relu"; +const char *G_OP_TYPE_FUSION_CONV_ADD_BN_RELU = "fusion_conv_add_bn_relu"; +const char *G_OP_TYPE_FUSION_DWCONV_BN_RELU = "fusion_dwconv_bn_relu"; +const char *G_OP_TYPE_FUSION_CONV_BN_RELU = "fusion_conv_bn_relu"; +const char *G_OP_TYPE_FC = "fusion_fc"; +const char *G_OP_TYPE_FUSION_CONV_ADD = "fusion_conv_add"; +const char *G_OP_TYPE_LRN = "lrn"; +const char *G_OP_TYPE_MUL = "mul"; +const char *G_OP_TYPE_MULTICLASS_NMS = "multiclass_nms"; +const char *G_OP_TYPE_POOL2D = "pool2d"; +const char *G_OP_TYPE_PRIOR_BOX = "prior_box"; +const char *G_OP_TYPE_RELU = "relu"; +const char *G_OP_TYPE_RESHAPE = "reshape"; +const char *G_OP_TYPE_SIGMOID = "sigmoid"; +const char *G_OP_TYPE_SOFTMAX = "softmax"; +const char *G_OP_TYPE_TRANSPOSE = "transpose"; +const char *G_OP_TYPE_SPLIT = "split"; +const char *G_OP_TYPE_FEED = "feed"; +const char *G_OP_TYPE_FETCH = "fetch"; +const char *G_OP_TYPE_DEPTHWISE_CONV = "depthwise_conv2d"; +const char *G_OP_TYPE_IM2SEQUENCE = "im2sequence"; +const char *G_OP_TYPE_DROPOUT = "dropout"; +const char *G_OP_TYPE_FUSION_CONV_ADD_BN = "fusion_conv_add_bn"; +const char *G_OP_TYPE_FUSION_POOL_BN = "fusion_pool_bn"; +const char *G_OP_TYPE_FUSION_ELEMENTWISE_ADD_RELU = "fusion_elementwise_add_relu"; -const std::string G_OP_TYPE_FUSION_FC_RELU = "fusion_fc_relu"; -const std::string G_OP_TYPE_REGION = "region"; +const char *G_OP_TYPE_FUSION_FC_RELU = "fusion_fc_relu"; +const char *G_OP_TYPE_REGION = "region"; std::unordered_map< std::string, std::pair, std::vector>> diff --git a/src/common/types.h b/src/common/types.h index ae993f8034d7136a4badac2bbaf0353c6ef05222..7745f80a9ca2ef6f0258f6f2eacf45761d29a00e 100644 --- a/src/common/types.h +++ b/src/common/types.h @@ -73,40 +73,40 @@ enum PMStatus { PMWrongDevice = 0x08 /*!< un-correct device. */ }; -extern const std::string G_OP_TYPE_CONV; -extern const std::string G_OP_TYPE_BATCHNORM; -extern const std::string G_OP_TYPE_BOX_CODER; -extern const std::string G_OP_TYPE_CONCAT; -extern const std::string G_OP_TYPE_ELEMENTWISE_ADD; -extern const std::string G_OP_TYPE_FUSION_CONV_ADD_RELU; -extern const std::string G_OP_TYPE_FC; -extern const std::string G_OP_TYPE_FUSION_CONV_ADD; -extern const std::string G_OP_TYPE_FUSION_CONV_ADD_BN_RELU; -extern const std::string G_OP_TYPE_FUSION_DWCONV_BN_RELU; -extern const std::string G_OP_TYPE_FUSION_CONV_BN_RELU; - -extern const std::string G_OP_TYPE_LRN; -extern const std::string G_OP_TYPE_MUL; -extern const std::string G_OP_TYPE_MULTICLASS_NMS; -extern const std::string G_OP_TYPE_POOL2D; -extern const std::string G_OP_TYPE_PRIOR_BOX; -extern const std::string G_OP_TYPE_RELU; -extern const std::string G_OP_TYPE_RESHAPE; -extern const std::string G_OP_TYPE_SIGMOID; -extern const std::string G_OP_TYPE_SOFTMAX; -extern const std::string G_OP_TYPE_TRANSPOSE; -extern const std::string G_OP_TYPE_SPLIT; -extern const std::string G_OP_TYPE_FEED; -extern const std::string G_OP_TYPE_FETCH; -extern const std::string G_OP_TYPE_DEPTHWISE_CONV; -extern const std::string G_OP_TYPE_IM2SEQUENCE; -extern const std::string G_OP_TYPE_DROPOUT; - -extern const std::string G_OP_TYPE_FUSION_CONV_ADD_BN; -extern const std::string G_OP_TYPE_FUSION_POOL_BN; -extern const std::string G_OP_TYPE_FUSION_ELEMENTWISE_ADD_RELU; -extern const std::string G_OP_TYPE_FUSION_FC_RELU; -extern const std::string G_OP_TYPE_REGION; +extern const char *G_OP_TYPE_CONV; +extern const char *G_OP_TYPE_BATCHNORM; +extern const char *G_OP_TYPE_BOX_CODER; +extern const char *G_OP_TYPE_CONCAT; +extern const char *G_OP_TYPE_ELEMENTWISE_ADD; +extern const char *G_OP_TYPE_FUSION_CONV_ADD_RELU; +extern const char *G_OP_TYPE_FC; +extern const char *G_OP_TYPE_FUSION_CONV_ADD; +extern const char *G_OP_TYPE_FUSION_CONV_ADD_BN_RELU; +extern const char *G_OP_TYPE_FUSION_DWCONV_BN_RELU; +extern const char *G_OP_TYPE_FUSION_CONV_BN_RELU; + +extern const char *G_OP_TYPE_LRN; +extern const char *G_OP_TYPE_MUL; +extern const char *G_OP_TYPE_MULTICLASS_NMS; +extern const char *G_OP_TYPE_POOL2D; +extern const char *G_OP_TYPE_PRIOR_BOX; +extern const char *G_OP_TYPE_RELU; +extern const char *G_OP_TYPE_RESHAPE; +extern const char *G_OP_TYPE_SIGMOID; +extern const char *G_OP_TYPE_SOFTMAX; +extern const char *G_OP_TYPE_TRANSPOSE; +extern const char *G_OP_TYPE_SPLIT; +extern const char *G_OP_TYPE_FEED; +extern const char *G_OP_TYPE_FETCH; +extern const char *G_OP_TYPE_DEPTHWISE_CONV; +extern const char *G_OP_TYPE_IM2SEQUENCE; +extern const char *G_OP_TYPE_DROPOUT; + +extern const char *G_OP_TYPE_FUSION_CONV_ADD_BN; +extern const char *G_OP_TYPE_FUSION_POOL_BN; +extern const char *G_OP_TYPE_FUSION_ELEMENTWISE_ADD_RELU; +extern const char *G_OP_TYPE_FUSION_FC_RELU; +extern const char *G_OP_TYPE_REGION; extern std::unordered_map< std::string, std::pair, std::vector>> diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index f747b8202c07b50511b252cc8217d1a4be7c37a9..418ebff79161675e8b23a4cca8f4319121aa6002 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -145,6 +145,10 @@ else () ADD_EXECUTABLE(test-conv-add-relu-op operators/test_conv_add_relu_op.cpp test_helper.h test_include.h executor_for_test.h) target_link_libraries(test-conv-add-relu-op paddle-mobile) + # gen test + ADD_EXECUTABLE(test-conv-add-bn-relu-op operators/test_fusion_conv_add_bn_relu_op.cpp test_helper.h test_include.h executor_for_test.h) + target_link_libraries(test-conv-add-bn-relu-op paddle-mobile) + #add_library(test-lib-size SHARED common/test_lib_size.h common/test_lib_size.cpp) endif() diff --git a/test/executor_for_test.h b/test/executor_for_test.h index c9ab4783d6826992ee81ffd63b0391169645576c..93847af20a6d48a6df33dc50f6c6a1db76facf51 100644 --- a/test/executor_for_test.h +++ b/test/executor_for_test.h @@ -43,7 +43,7 @@ template class Executor4Test : public Executor { public: Executor4Test(Program p, string op_type, - bool use_optimize = false) + bool use_optimize = false, int predict_op_count = 1) : Executor() { this->use_optimize_ = use_optimize; this->program_ = p; @@ -57,12 +57,14 @@ class Executor4Test : public Executor { LOG(paddle_mobile::LogLevel::kLOG_ERROR) << "to_predict_program_ == nullptr"; } + const std::vector> blocks = this->to_predict_program_->Blocks(); for (std::shared_ptr block_desc : blocks) { std::vector> ops = block_desc->Ops(); - for (std::shared_ptr op : ops) { - if (op->Type() == op_type) { + for (int i = 0; i < ops.size(); ++i) { + auto op = ops[i]; + if (op->Type() == op_type && i < predict_op_count) { DLOG << "匹配到: " << op->Type(); /// test first meeting op in program @@ -72,11 +74,17 @@ class Executor4Test : public Executor { op->Type(), op->GetInputs(), op->GetOutputs(), op->GetAttrMap(), this->program_.scope); this->ops_of_block_[*block_desc.get()].push_back(op_ptr); - break; } } } this->InitMemory(); + + std::shared_ptr to_predict_block = + this->to_predict_program_->Block(0); + auto &ops = this->ops_of_block_[*to_predict_block.get()]; + for (const auto &op : ops) { + op->Init(); + } } template @@ -130,9 +138,6 @@ class Executor4Test : public Executor { auto *output_tensor = con_output->GetMutable(); output_tensor->mutable_data(dDim); - std::shared_ptr out_tensor = std::make_shared(); - out_tensor.reset(output_tensor); - std::shared_ptr to_predict_block = this->to_predict_program_->Block(0); for (int j = 0; j < this->ops_of_block_[*to_predict_block.get()].size(); @@ -141,6 +146,7 @@ class Executor4Test : public Executor { op->Run(); } - return out_tensor; + return std::make_shared( + paddle_mobile::framework::Tensor(*output_tensor)); } }; diff --git a/test/net/test_mobilenet+ssd.cpp b/test/net/test_mobilenet+ssd.cpp index a3d780a4854d018f948af2890bfe9f1e7a8fefef..9b4e5f2d3a431001e138977b78994f5dfedbe0a3 100644 --- a/test/net/test_mobilenet+ssd.cpp +++ b/test/net/test_mobilenet+ssd.cpp @@ -20,22 +20,20 @@ int main() { paddle_mobile::PaddleMobile paddle_mobile; paddle_mobile.SetThreadNum(4); auto time1 = time(); - auto isok = paddle_mobile.Load(g_mobilenet_ssd_gesture + "/model", - g_mobilenet_ssd_gesture + "/params", true); + auto isok = paddle_mobile.Load( + std::string(g_mobilenet_ssd_gesture) + "/model", + std::string(g_mobilenet_ssd_gesture) + "/params", true); // auto isok = paddle_mobile.Load(g_mobilenet_ssd, false); if (isok) { auto time2 = time(); std::cout << "load cost :" << time_diff(time1, time2) << "ms" << std::endl; + std::vector input; std::vector dims{1, 3, 300, 300}; - Tensor input_tensor; - SetupTensor(&input_tensor, {1, 3, 300, 300}, static_cast(0), - static_cast(1)); + GetInput(g_hand, &input, dims); - std::vector input(input_tensor.data(), - input_tensor.data() + input_tensor.numel()); auto time3 = time(); - paddle_mobile.Predict(input, dims); + auto output = paddle_mobile.Predict(input, dims); auto time4 = time(); std::cout << "predict cost :" << time_diff(time3, time4) << "ms" << std::endl; diff --git a/test/net/test_mobilenet.cpp b/test/net/test_mobilenet.cpp index 95ffc59c394782b69d17f16c549b0e6923fd31e8..9fc7226fc12fa7a0c631c9920487c0bd56c90816 100644 --- a/test/net/test_mobilenet.cpp +++ b/test/net/test_mobilenet.cpp @@ -24,19 +24,21 @@ int main() { auto time2 = time(); std::cout << "load cost :" << time_diff(time1, time1) << "ms" << std::endl; + std::vector input; std::vector dims{1, 3, 224, 224}; - Tensor input_tensor; - SetupTensor(&input_tensor, {1, 3, 224, 224}, static_cast(0), - static_cast(1)); - - std::vector input(input_tensor.data(), - input_tensor.data() + input_tensor.numel()); - auto time3 = time(); - auto vec_result = paddle_mobile.Predict(input, dims); - auto time4 = time(); - - std::cout << "predict cost :" << time_diff(time3, time4) << "ms" - << std::endl; + GetInput(g_test_image_1x3x224x224, &input, dims); + + for (int i = 0; i < 10; ++i) { + auto time3 = time(); + auto vec_result = paddle_mobile.Predict(input, dims); + auto time4 = time(); + std::vector::iterator biggest = + std::max_element(std::begin(vec_result), std::end(vec_result)); + std::cout << " Max element is " << *biggest << " at position " + << std::distance(std::begin(vec_result), biggest) << std::endl; + std::cout << "predict cost :" << time_diff(time3, time4) << "ms" + << std::endl; + } } return 0; diff --git a/test/operators/test_fusion_conv_add_bn_relu_op.cpp b/test/operators/test_fusion_conv_add_bn_relu_op.cpp new file mode 100644 index 0000000000000000000000000000000000000000..81400d987195364c06b4b93d0859469b43f90e7b --- /dev/null +++ b/test/operators/test_fusion_conv_add_bn_relu_op.cpp @@ -0,0 +1,62 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "../test_include.h" +#include "operators/fusion_conv_add_bn_relu_op.h" + +int main() { + paddle_mobile::Loader loader; + // ../models/image_classification_resnet.inference.model + auto program = loader.Load(g_mobilenet, true); + + PADDLE_MOBILE_ENFORCE(program.originProgram != nullptr, + "program file read fail"); + + Executor4Test> + executor(program, "fusion_conv_add_bn_relu", true); + + std::cout << "executor 4 test: " << std::endl; + + paddle_mobile::framework::Tensor input; + GetInput(g_test_image_1x3x224x224_banana, &input, {1, 3, 224, 224}); + // // use SetupTensor if not has local input image . + // SetupTensor(&input, {1, 3, 224, 224}, static_cast(0), + // static_cast(1)); + + DLOG << " fuck: " << input; + + auto out_ddim = paddle_mobile::framework::make_ddim({1, 32, 112, 112}); + std::cout << "before predict: " << std::endl; + auto output = + executor.Predict(input, "data", "conv2_1_dw_bn.tmp_2", out_ddim); + std::cout << "after predict " << std::endl; + auto output_ptr = output->data(); + + int stride = output->numel() / 100; + for (int i = 0; i < 100; i++) { + DLOG << " index:" << i * stride << " value: " << output_ptr[i * stride]; + } + + // for (int i = 0; i < 100; i++) { + // DLOG << " index:" << i << " value: "<< output_ptr[i]; + // } + + // for (int j = 0; j < output->numel(); ++j) { + // std::cout << " (index: " << j << " value: " << output_ptr[j] << ") "; + // } + std::cout << std::endl; + return 0; +} diff --git a/test/test_helper.h b/test/test_helper.h index fb6724f9c5764497ec81de0d73406709f098e0e0..9a5c62c79c44fdf52657ea5facb5f0768810c440 100644 --- a/test/test_helper.h +++ b/test/test_helper.h @@ -24,18 +24,21 @@ limitations under the License. */ #include "framework/ddim.h" #include "framework/tensor.h" -static const std::string g_mobilenet_ssd = "../models/mobilenet+ssd"; -static const std::string g_mobilenet_ssd_gesture = - "../models/mobilenet+ssd_gesture"; -static const std::string g_squeezenet = "../models/squeezenet"; -static const std::string g_googlenet = "../models/googlenet"; -static const std::string g_mobilenet = "../models/mobilenet"; -static const std::string g_resnet_50 = "../models/resnet_50"; -static const std::string g_resnet = "../models/resnet"; -static const std::string g_googlenet_combine = "../models/googlenet_combine"; -static const std::string g_yolo = "../models/yolo"; -static const std::string g_test_image_1x3x224x224 = +static const char *g_mobilenet_ssd = "../models/mobilenet+ssd"; +static const char *g_mobilenet_ssd_gesture = "../models/mobilenet+ssd_gesture"; +static const char *g_squeezenet = "../models/squeezenet"; +static const char *g_googlenet = "../models/googlenet"; +static const char *g_mobilenet = "../models/mobilenet"; +static const char *g_resnet_50 = "../models/resnet_50"; +static const char *g_resnet = "../models/resnet"; +static const char *g_googlenet_combine = "../models/googlenet_combine"; +static const char *g_yolo = "../models/yolo"; +static const char *g_test_image_1x3x224x224 = "../images/test_image_1x3x224x224_float"; +static const char *g_test_image_1x3x224x224_banana = + "../images/input_3x224x224_banana"; +static const char *g_hand = "../images/hand_image"; + using paddle_mobile::framework::DDim; using paddle_mobile::framework::Tensor;