diff --git a/src/common/types.cpp b/src/common/types.cpp index 41bbfa5256a1d55ac9c8ebe3ba695c4a6f1be720..e06e9965c4108988ed9e6675f7a012631e81049f 100644 --- a/src/common/types.cpp +++ b/src/common/types.cpp @@ -51,6 +51,8 @@ const char *G_OP_TYPE_FUSION_ELEMENTWISE_ADD_RELU = const char *G_OP_TYPE_FUSION_FC_RELU = "fusion_fc_relu"; const char *G_OP_TYPE_REGION = "region"; const char *G_OP_TYPE_FUSION_CONV_BN = "fusion_conv_bn"; +const char *G_OP_TYPE_CONV_TRANSPOSE = "conv2d_transpose"; +const char *G_OP_TYPE_PRELU = "prelu"; std::unordered_map< std::string, std::pair, std::vector>> @@ -58,6 +60,7 @@ std::unordered_map< {G_OP_TYPE_CONV, {{"Input"}, {"Output"}}}, {G_OP_TYPE_FUSION_DWCONV_BN_RELU, {{"Input"}, {"Out"}}}, {G_OP_TYPE_FUSION_CONV_BN_RELU, {{"Input"}, {"Out"}}}, + {G_OP_TYPE_PRELU, {{"X", "Alpha"}, {"Out"}}}, {G_OP_TYPE_FUSION_CONV_ADD, {{"Input"}, {"Out"}}}, {G_OP_TYPE_RELU, {{"X"}, {"Out"}}}, {G_OP_TYPE_SOFTMAX, {{"X"}, {"Out"}}}, @@ -87,6 +90,7 @@ std::unordered_map< {G_OP_TYPE_FUSION_ELEMENTWISE_ADD_RELU, {{"X", "Y"}, {"Out"}}}, {G_OP_TYPE_FUSION_FC_RELU, {{"X", "Y", "Z"}, {"Out"}}}, {G_OP_TYPE_REGION, {{"X"}, {"Out"}}}, - {G_OP_TYPE_FUSION_CONV_BN, {{"Input"}, {"Y"}}}}; + {G_OP_TYPE_FUSION_CONV_BN, {{"Input"}, {"Y"}}}, + {G_OP_TYPE_CONV_TRANSPOSE, {{"Input"}, {"Output"}}}}; } // namespace paddle_mobile diff --git a/src/common/types.h b/src/common/types.h index 78c96f327a5f483ebee9d56bf338a4415542fbde..bab169977135ce4f572bf4242837ed39588cc97b 100644 --- a/src/common/types.h +++ b/src/common/types.h @@ -114,6 +114,8 @@ extern const char *G_OP_TYPE_FUSION_ELEMENTWISE_ADD_RELU; extern const char *G_OP_TYPE_FUSION_FC_RELU; extern const char *G_OP_TYPE_REGION; extern const char *G_OP_TYPE_FUSION_CONV_BN; +extern const char *G_OP_TYPE_CONV_TRANSPOSE; +extern const char *G_OP_TYPE_PRELU; extern std::unordered_map< std::string, std::pair, std::vector>> diff --git a/src/operators/conv_transpose_op.cpp b/src/operators/conv_transpose_op.cpp index 1e1d9e9c519732607b27aac7873b6a8eec93510b..34de4cbb10d3689f0be95f1277cfdd76b4c2c141 100644 --- a/src/operators/conv_transpose_op.cpp +++ b/src/operators/conv_transpose_op.cpp @@ -20,4 +20,13 @@ namespace paddle_mobile { namespace operators {} } // namespace paddle_mobile +namespace ops = paddle_mobile::operators; +#ifdef PADDLE_MOBILE_CPU +REGISTER_OPERATOR_CPU(conv2d_transpose, ops::ConvOpTranspose); +#endif +#ifdef PADDLE_MOBILE_MALI_GPU +#endif +#ifdef PADDLE_MOBILE_FPGA +#endif + #endif diff --git a/src/operators/conv_transpose_op.h b/src/operators/conv_transpose_op.h index 8176913b0d9535f7bd677439f314137e9d59ad72..81fa6916477fc1be99fe3daf5933a1ca7c726363 100644 --- a/src/operators/conv_transpose_op.h +++ b/src/operators/conv_transpose_op.h @@ -88,4 +88,14 @@ class ConvOpTranspose : public framework::OperatorWithKernel< } // namespace operators } // namespace paddle_mobile +#ifdef PADDLE_MOBILE_CPU +USE_OP_CPU(conv2d_transpose); +#endif +#ifdef PADDLE_MOBILE_MALI_GPU +USE_OP_MALI_GPU(conv2d_transpose); +#endif +#ifdef PADDLE_MOBILE_FPGA +USE_OP_FPGA(conv2d_transpose); +#endif + #endif diff --git a/src/operators/kernel/arm/prelu_kernel.cpp b/src/operators/kernel/arm/prelu_kernel.cpp index a83783a078f4ec680fbab238a2839226546f894c..a6b2606a1b5bced4e0e3e429223af57cee316141 100644 --- a/src/operators/kernel/arm/prelu_kernel.cpp +++ b/src/operators/kernel/arm/prelu_kernel.cpp @@ -33,77 +33,36 @@ struct PReluFunctor { * */ template <> void PReluKernel::Compute(const PReluParam ¶m) const { - const auto *input_x = param.InputX(); - auto *input_x_ptr = input_x->data(); - auto *out = param.Out(); - auto *out_ptr = out->mutable_data(); - - if (param.Slopes().size() == 1) { - PReluFunctor func_(param.Slopes()[0]); - math::Transform trans; - trans(input_x_ptr, input_x_ptr + input_x->numel(), out_ptr, func_); - } else if (param.Slopes().size() > 1) { - const int dim_size = input_x->dims().size(); - switch (dim_size) { - case 0: - break; - case 1: { - const int input_width = input_x->dims()[0]; - math::Transform trans; - - #pragma omp parallel for - for (int w = 0; w < input_width; ++w) { - out_ptr[w] = input_x_ptr[w] * param.Slopes()[w]; - } - } break; - case 2: { - const int input_height = input_x->dims()[0]; - const int input_width = input_x->dims()[1]; - - math::Transform trans; - #pragma omp parallel for - for (int h = 0; h < input_height; ++h) { - PReluFunctor func_(param.Slopes()[h]); - const float *ptr = input_x_ptr + h * input_width; - float *optr = out_ptr + +h * input_width; - trans(ptr, ptr + input_width, optr, func_); - } - } break; - case 3: { - const int chan_size = input_x->dims()[0]; - const int input_height = input_x->dims()[1]; - const int input_width = input_x->dims()[2]; + DLOG << "PReluKernel :Compute"; - math::Transform trans; - #pragma omp parallel for - for (int c = 0; c < chan_size; ++c) { - PReluFunctor func_(param.Slopes()[c]); - int size = input_height * input_width; - const float *ptr = input_x_ptr + c * size; - float *optr = out_ptr + c * size; - trans(ptr, ptr + size, optr, func_); - } - } break; - case 4: - default: { - const int batch_size = input_x->dims()[0]; - const int chan_size = input_x->dims()[1]; - const int input_height = input_x->dims()[2]; - const int input_width = input_x->dims()[3]; - math::Transform trans; - - #pragma omp parallel for - for (int b = 0; b < batch_size; ++b) { - for (int c = 0; c < chan_size; ++c) { - PReluFunctor func_(param.Slopes()[c]); - int size = input_height * input_width; - const float *ptr = input_x_ptr + b * c * size; - float *optr = out_ptr + +b * c * size; - trans(ptr, ptr + size, optr, func_); - } - } - } // case 3,default - break; + auto *x = param.InputX(); + auto *alpha = param.InputAlpha(); + auto *out = param.Out(); + std::string mode = param.Mode(); + const auto *x_ptr = x->data(); + auto *o_ptr = out->mutable_data(); + const auto *alpha_ptr = alpha->data(); + int numel = x->numel(); + auto dim = x->dims(); + int index = 0; + int i = 0; + int temp = 0; + if (mode == "channel") { + #pragma omp parallel for + for (i = 0; i < numel; i++) { + temp = numel / (dim[0] * dim[1]); + index = (i / temp) % dim[1]; + o_ptr[i] = x_ptr[i] > 0 ? x_ptr[i] : alpha_ptr[index] * x_ptr[i]; + } + } else if (mode == "element") { + #pragma omp parallel for + for (i = 0; i < numel; i++) { + o_ptr[i] = x_ptr[i] > 0 ? x_ptr[i] : alpha_ptr[i] * x_ptr[i]; + } + } else { + #pragma omp parallel for + for (i = 0; i < numel; i++) { + o_ptr[i] = x_ptr[i] > 0 ? x_ptr[i] : alpha_ptr[0] * x_ptr[i]; } } } diff --git a/src/operators/op_param.h b/src/operators/op_param.h index 1ed5b4c8a8d3666236dadacb6d92f66f9e74889a..06da537e419f3a54ffc9986b12274f9853f12774 100644 --- a/src/operators/op_param.h +++ b/src/operators/op_param.h @@ -39,6 +39,11 @@ using std::vector; class OpParam { protected: + template + static T *InputAlphaFrom(const VariableNameMap &inputs, const Scope &scope) { + return GetVarValue("Alpha", inputs, scope); + } + template static T *InputFrom(const VariableNameMap &inputs, const Scope &scope) { return GetVarValue("Input", inputs, scope); @@ -895,19 +900,24 @@ class PReluParam : public OpParam { public: PReluParam(const VariableNameMap &inputs, const VariableNameMap &outputs, const AttributeMap &attrs, const Scope &scope) { + DLOG << "PReluParam inputs before"; input_x_ = InputXFrom(inputs, scope); + alpha_ = InputAlphaFrom(inputs, scope); + framework::DDim dims = alpha_->dims(); out_ = OutFrom(outputs, scope); - slopes_ = GetAttr>("slopes", attrs); + mode_ = GetAttr("mode", attrs); + DLOG << "PReluParam mode after" << mode_; } - const Tensor *InputX() const { return input_x_; } + const Tensor *InputAlpha() const { return alpha_; } Tensor *Out() const { return out_; } - const vector &Slopes() const { return slopes_; } + const std::string &Mode() const { return mode_; } private: Tensor *input_x_; Tensor *out_; - vector slopes_; + Tensor *alpha_; + std::string mode_; }; #endif diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 3315d39ebe04d295a4325fdf56ca203b7d303742..8f92b6dab9e5c2c51c485f61fa2860926ce50b1f 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -159,6 +159,10 @@ else () ADD_EXECUTABLE(test-mobilenet-combine net/test_mobilenet_combine.cpp test_helper.h test_include.h executor_for_test.h) target_link_libraries(test-mobilenet-combine paddle-mobile) + # gen test + ADD_EXECUTABLE(test-genet net/test_genet_combine.cpp test_helper.h test_include.h executor_for_test.h) + target_link_libraries(test-genet paddle-mobile) + # gen test ADD_EXECUTABLE(test-sigmoid operators/test_sigmoid_op.cpp test_include.h) target_link_libraries(test-sigmoid paddle-mobile) diff --git a/test/net/test_genet_combine.cpp b/test/net/test_genet_combine.cpp new file mode 100644 index 0000000000000000000000000000000000000000..e6b0505a670f1a58ed7d09cc4854ef52b05b0649 --- /dev/null +++ b/test/net/test_genet_combine.cpp @@ -0,0 +1,51 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include +#include "../test_helper.h" +#include "../test_include.h" + +int main() { + paddle_mobile::PaddleMobile paddle_mobile; + paddle_mobile.SetThreadNum(4); + auto time1 = time(); + if (paddle_mobile.Load(std::string(g_genet_combine) + "/model", + std::string(g_genet_combine) + "/params", true)) { + auto time2 = time(); + std::cout << "load cost :" << time_diff(time1, time1) << "ms" << std::endl; + + std::vector input; + std::vector dims{1, 3, 128, 128}; + GetInput(g_test_image_1x3x224x224_banana, &input, dims); + + // 预热一次 + auto vec_result = paddle_mobile.Predict(input, dims); + std::vector::iterator biggest = + std::max_element(std::begin(vec_result), std::end(vec_result)); + std::cout << " Max element is " << *biggest << " at position " + << std::distance(std::begin(vec_result), biggest) << std::endl; + + auto time3 = time(); + for (int i = 0; i < 10; ++i) { + auto vec_result = paddle_mobile.Predict(input, dims); + } + auto time4 = time(); + std::cout << "predict cost :" << time_diff(time3, time4) / 10 << "ms" + << std::endl; + } + std::cout + << "如果结果Nan请查看: test/images/test_image_1x3x224x224_float 是否存在?" + << std::endl; + return 0; +} diff --git a/test/test_helper.h b/test/test_helper.h index fef175951e834a176c7987a77d53f2b5b4eecc5b..69ffa58847f2395dec59d87abae4128d885dd19a 100644 --- a/test/test_helper.h +++ b/test/test_helper.h @@ -26,6 +26,7 @@ limitations under the License. */ static const char *g_ocr = "../models/ocr"; static const char *g_mobilenet_ssd = "../models/mobilenet+ssd"; +static const char *g_genet_combine = "../models/enet"; static const char *g_mobilenet_ssd_gesture = "../models/mobilenet+ssd_gesture"; static const char *g_mobilenet_combined = "../models/mobilenet_combine"; static const char *g_mobilenet_detect = "../models/mobilenet-detect";