Merge remote-tracking branch 'origin/develop' into develop

5737bb9c · qnqinan · e7104649 · 42a306ba · 5737bb9c · 5737bb9c
9 changed file
--- a/src/common/types.cpp
+++ b/src/common/types.cpp
@@ -51,6 +51,8 @@ const char *G_OP_TYPE_FUSION_ELEMENTWISE_ADD_RELU =
 const char *G_OP_TYPE_FUSION_FC_RELU = "fusion_fc_relu";
 const char *G_OP_TYPE_REGION = "region";
 const char *G_OP_TYPE_FUSION_CONV_BN = "fusion_conv_bn";
+const char *G_OP_TYPE_CONV_TRANSPOSE = "conv2d_transpose";
+const char *G_OP_TYPE_PRELU = "prelu";
 std::unordered_map<
    std::string, std::pair<std::vector<std::string>, std::vector<std::string>>>
@@ -58,6 +60,7 @@ std::unordered_map<
        {G_OP_TYPE_CONV, {{"Input"}, {"Output"}}},
        {G_OP_TYPE_FUSION_DWCONV_BN_RELU, {{"Input"}, {"Out"}}},
        {G_OP_TYPE_FUSION_CONV_BN_RELU, {{"Input"}, {"Out"}}},
+        {G_OP_TYPE_PRELU, {{"X", "Alpha"}, {"Out"}}},
        {G_OP_TYPE_FUSION_CONV_ADD, {{"Input"}, {"Out"}}},
        {G_OP_TYPE_RELU, {{"X"}, {"Out"}}},
        {G_OP_TYPE_SOFTMAX, {{"X"}, {"Out"}}},
@@ -87,6 +90,7 @@ std::unordered_map<
        {G_OP_TYPE_FUSION_ELEMENTWISE_ADD_RELU, {{"X", "Y"}, {"Out"}}},
        {G_OP_TYPE_FUSION_FC_RELU, {{"X", "Y", "Z"}, {"Out"}}},
        {G_OP_TYPE_REGION, {{"X"}, {"Out"}}},
-        {G_OP_TYPE_FUSION_CONV_BN, {{"Input"}, {"Y"}}}};
+        {G_OP_TYPE_FUSION_CONV_BN, {{"Input"}, {"Y"}}},
+        {G_OP_TYPE_CONV_TRANSPOSE, {{"Input"}, {"Output"}}}};
 }  // namespace paddle_mobile
--- a/src/common/types.h
+++ b/src/common/types.h
@@ -114,6 +114,8 @@ extern const char *G_OP_TYPE_FUSION_ELEMENTWISE_ADD_RELU;
 extern const char *G_OP_TYPE_FUSION_FC_RELU;
 extern const char *G_OP_TYPE_REGION;
 extern const char *G_OP_TYPE_FUSION_CONV_BN;
+extern const char *G_OP_TYPE_CONV_TRANSPOSE;
+extern const char *G_OP_TYPE_PRELU;
 extern std::unordered_map<
    std::string, std::pair<std::vector<std::string>, std::vector<std::string>>>

--- a/src/operators/conv_transpose_op.cpp
+++ b/src/operators/conv_transpose_op.cpp
@@ -20,4 +20,13 @@ namespace paddle_mobile {
 namespace operators {}
 }  // namespace paddle_mobile
+namespace ops = paddle_mobile::operators;
+#ifdef PADDLE_MOBILE_CPU
+REGISTER_OPERATOR_CPU(conv2d_transpose, ops::ConvOpTranspose);
+#endif
+#ifdef PADDLE_MOBILE_MALI_GPU
+#endif
+#ifdef PADDLE_MOBILE_FPGA
+#endif
 #endif
--- a/src/operators/conv_transpose_op.h
+++ b/src/operators/conv_transpose_op.h
@@ -88,4 +88,14 @@ class ConvOpTranspose : public framework::OperatorWithKernel<
 }  // namespace operators
 }  // namespace paddle_mobile
+#ifdef PADDLE_MOBILE_CPU
+USE_OP_CPU(conv2d_transpose);
+#endif
+#ifdef PADDLE_MOBILE_MALI_GPU
+USE_OP_MALI_GPU(conv2d_transpose);
+#endif
+#ifdef PADDLE_MOBILE_FPGA
+USE_OP_FPGA(conv2d_transpose);
+#endif
 #endif
--- a/src/operators/kernel/arm/prelu_kernel.cpp
+++ b/src/operators/kernel/arm/prelu_kernel.cpp
@@ -33,77 +33,34 @@ struct PReluFunctor {
 * */
 template <>
 void PReluKernel<CPU, float>::Compute(const PReluParam &param) const {
-  const auto *input_x = param.InputX();
+  auto *x = param.InputX();
-  auto *input_x_ptr = input_x->data<float>();
+  auto *alpha = param.InputAlpha();
  auto *out = param.Out();
-  auto *out_ptr = out->mutable_data<float>();
+  std::string mode = param.Mode();
+  const auto *x_ptr = x->data<float>();
-  if (param.Slopes().size() == 1) {
+  auto *o_ptr = out->mutable_data<float>();
-    PReluFunctor<float> func_(param.Slopes()[0]);
+  const auto *alpha_ptr = alpha->data<float>();
-    math::Transform trans;
+  int numel = x->numel();
-    trans(input_x_ptr, input_x_ptr + input_x->numel(), out_ptr, func_);
+  auto dim = x->dims();
-  } else if (param.Slopes().size() > 1) {
+  int index = 0;
-    const int dim_size = input_x->dims().size();
+  int i = 0;
-    switch (dim_size) {
+  int temp = 0;
-      case 0:
+  if (mode == "channel") {
-        break;
+    temp = numel / (dim[0] * dim[1]);
-      case 1: {
+    #pragma omp parallel for
-        const int input_width = input_x->dims()[0];
+    for (i = 0; i < numel; i++) {
-        math::Transform trans;
+      index = (i / temp) % dim[1];
+      o_ptr[i] = x_ptr[i] > 0 ? x_ptr[i] : alpha_ptr[index] * x_ptr[i];
-        #pragma omp parallel for
+    }
-        for (int w = 0; w < input_width; ++w) {
+  } else if (mode == "element") {
-          out_ptr[w] = input_x_ptr[w] * param.Slopes()[w];
+    #pragma omp parallel for
-        }
+    for (i = 0; i < numel; i++) {
-      } break;
+      o_ptr[i] = x_ptr[i] > 0 ? x_ptr[i] : alpha_ptr[i] * x_ptr[i];
-      case 2: {
+    }
-        const int input_height = input_x->dims()[0];
+  } else {
-        const int input_width = input_x->dims()[1];
+    #pragma omp parallel for
+    for (i = 0; i < numel; i++) {
-        math::Transform trans;
+      o_ptr[i] = x_ptr[i] > 0 ? x_ptr[i] : alpha_ptr[0] * x_ptr[i];
-        #pragma omp parallel for
-        for (int h = 0; h < input_height; ++h) {
-          PReluFunctor<float> func_(param.Slopes()[h]);
-          const float *ptr = input_x_ptr + h * input_width;
-          float *optr = out_ptr + +h * input_width;
-          trans(ptr, ptr + input_width, optr, func_);
-        }
-      } break;
-      case 3: {
-        const int chan_size = input_x->dims()[0];
-        const int input_height = input_x->dims()[1];
-        const int input_width = input_x->dims()[2];
-        math::Transform trans;
-        #pragma omp parallel for
-        for (int c = 0; c < chan_size; ++c) {
-          PReluFunctor<float> func_(param.Slopes()[c]);
-          int size = input_height * input_width;
-          const float *ptr = input_x_ptr + c * size;
-          float *optr = out_ptr + c * size;
-          trans(ptr, ptr + size, optr, func_);
-        }
-      } break;
-      case 4:
-      default: {
-        const int batch_size = input_x->dims()[0];
-        const int chan_size = input_x->dims()[1];
-        const int input_height = input_x->dims()[2];
-        const int input_width = input_x->dims()[3];
-        math::Transform trans;
-        #pragma omp parallel for
-        for (int b = 0; b < batch_size; ++b) {
-          for (int c = 0; c < chan_size; ++c) {
-            PReluFunctor<float> func_(param.Slopes()[c]);
-            int size = input_height * input_width;
-            const float *ptr = input_x_ptr + b * c * size;
-            float *optr = out_ptr + +b * c * size;
-            trans(ptr, ptr + size, optr, func_);
-          }
-        }
-      }  // case 3,default
-      break;
    }
  }
 }

--- a/src/operators/op_param.h
+++ b/src/operators/op_param.h
@@ -39,6 +39,11 @@ using std::vector;
 class OpParam {
 protected:
+  template <typename T>
+  static T *InputAlphaFrom(const VariableNameMap &inputs, const Scope &scope) {
+    return GetVarValue<T>("Alpha", inputs, scope);
+  }
  template <typename T>
  static T *InputFrom(const VariableNameMap &inputs, const Scope &scope) {
    return GetVarValue<T>("Input", inputs, scope);
@@ -895,19 +900,24 @@ class PReluParam : public OpParam {
 public:
  PReluParam(const VariableNameMap &inputs, const VariableNameMap &outputs,
             const AttributeMap &attrs, const Scope &scope) {
+    DLOG << "PReluParam inputs before";
    input_x_ = InputXFrom<LoDTensor>(inputs, scope);
+    alpha_ = InputAlphaFrom<LoDTensor>(inputs, scope);
+    framework::DDim dims = alpha_->dims();
    out_ = OutFrom<LoDTensor>(outputs, scope);
-    slopes_ = GetAttr<vector<float>>("slopes", attrs);
+    mode_ = GetAttr<std::string>("mode", attrs);
+    DLOG << "PReluParam mode after" << mode_;
  }
  const Tensor *InputX() const { return input_x_; }
+  const Tensor *InputAlpha() const { return alpha_; }
  Tensor *Out() const { return out_; }
-  const vector<float> &Slopes() const { return slopes_; }
+  const std::string &Mode() const { return mode_; }
 private:
  Tensor *input_x_;
  Tensor *out_;
-  vector<float> slopes_;
+  Tensor *alpha_;
+  std::string mode_;
 };
 #endif

--- a/test/CMakeLists.txt
+++ b/test/CMakeLists.txt
@@ -159,6 +159,10 @@ else ()
    ADD_EXECUTABLE(test-mobilenet-combine net/test_mobilenet_combine.cpp test_helper.h  test_include.h executor_for_test.h)
    target_link_libraries(test-mobilenet-combine paddle-mobile)
+    # gen test
+    ADD_EXECUTABLE(test-genet net/test_genet_combine.cpp test_helper.h  test_include.h executor_for_test.h)
+    target_link_libraries(test-genet paddle-mobile)
    # gen test
    ADD_EXECUTABLE(test-sigmoid operators/test_sigmoid_op.cpp  test_include.h)
    target_link_libraries(test-sigmoid paddle-mobile)

--- a/test/net/test_genet_combine.cpp
+++ b/test/net/test_genet_combine.cpp
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+    http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+#include <iostream>
+#include "../test_helper.h"
+#include "../test_include.h"
+int main() {
+  paddle_mobile::PaddleMobile<paddle_mobile::CPU> paddle_mobile;
+  paddle_mobile.SetThreadNum(4);
+  auto time1 = time();
+  if (paddle_mobile.Load(std::string(g_genet_combine) + "/model",
+                         std::string(g_genet_combine) + "/params", true)) {
+    auto time2 = time();
+    std::cout << "load cost :" << time_diff(time1, time1) << "ms" << std::endl;
+    std::vector<float> input;
+    std::vector<int64_t> dims{1, 3, 128, 128};
+    GetInput<float>(g_test_image_1x3x224x224_banana, &input, dims);
+    // 预热一次
+    auto vec_result = paddle_mobile.Predict(input, dims);
+    std::vector<float>::iterator biggest =
+        std::max_element(std::begin(vec_result), std::end(vec_result));
+    std::cout << " Max element is " << *biggest << " at position "
+              << std::distance(std::begin(vec_result), biggest) << std::endl;
+    auto time3 = time();
+    for (int i = 0; i < 10; ++i) {
+      auto vec_result = paddle_mobile.Predict(input, dims);
+    }
+    auto time4 = time();
+    std::cout << "predict cost :" << time_diff(time3, time4) / 10 << "ms"
+              << std::endl;
+  }
+  std::cout
+      << "如果结果Nan请查看: test/images/test_image_1x3x224x224_float 是否存在?"
+      << std::endl;
+  return 0;
+}
--- a/test/test_helper.h
+++ b/test/test_helper.h
@@ -26,6 +26,7 @@ limitations under the License. */
 static const char *g_ocr = "../models/ocr";
 static const char *g_mobilenet_ssd = "../models/mobilenet+ssd";
+static const char *g_genet_combine = "../models/enet";
 static const char *g_mobilenet_ssd_gesture = "../models/mobilenet+ssd_gesture";
 static const char *g_mobilenet_combined = "../models/mobilenet_combine";
 static const char *g_mobilenet_detect = "../models/mobilenet-detect";