imp fusion_conv_add_prelu and fusion_conv_add_add_prelu op

7421f560 · yangfei · a1a7b05b · 7421f560 · 7421f560 · 7421f560
12 changed file
--- a/src/operators/fusion_conv_add_add_prelu.cpp
+++ b/src/operators/fusion_conv_add_add_prelu.cpp
@@ -18,10 +18,10 @@ limitations under the License. */
 #include "operators/math/conv_func.h"
 namespace paddle_mobile {
-    namespace operators {
+namespace operators {
-        template <typename Dtype, typename T>
+template <typename Dtype, typename T>
-        void FusionConvAddAddPReluOp<Dtype, T>::InferShape() const {
+void FusionConvAddAddPReluOp<Dtype, T>::InferShape() const {
  auto in_dims = this->param_.Input()->dims();
  auto filter_dims = this->param_.Filter()->dims();
  const std::vector<int> &strides = this->param_.Strides();
@@ -42,9 +42,9 @@ namespace paddle_mobile {
  }
  framework::DDim ddim = framework::make_ddim(output_shape);
  this->param_.Output()->Resize(ddim);
-        }
+}
-    }  // namespace operators
+}  // namespace operators
 }  // namespace paddle_mobile
 namespace ops = paddle_mobile::operators;

--- a/src/operators/fusion_conv_add_add_prelu_op.h
+++ b/src/operators/fusion_conv_add_add_prelu_op.h
@@ -24,24 +24,24 @@ limitations under the License. */
 #include "operators/op_param.h"
 namespace paddle_mobile {
-    namespace operators {
+namespace operators {
-        class FusionConvAddAddPReluOpMatcher : public framework::FusionOpMatcher {
+class FusionConvAddAddPReluOpMatcher : public framework::FusionOpMatcher {
 public:
  FusionConvAddAddPReluOpMatcher() {
    node_ = framework::Node(G_OP_TYPE_CONV);
    node_ > std::make_shared<framework::Node>(G_OP_TYPE_ELEMENTWISE_ADD) >
-                std::make_shared<framework::Node>(G_OP_TYPE_ELEMENTWISE_ADD)
+        std::make_shared<framework::Node>(G_OP_TYPE_ELEMENTWISE_ADD) >
-                > std::make_shared<framework::Node>(G_OP_TYPE_PRELU);
+        std::make_shared<framework::Node>(G_OP_TYPE_PRELU);
  }
  void FolderNodes(
      framework::Node *node,
      std::vector<std::shared_ptr<framework::Node>> *removed_nodes) {
    node->Folder(node_.Depth(), Type(),
-                             {{G_OP_TYPE_ELEMENTWISE_ADD, {{"Y", "Y"}, {"Out", "addOut"},{"X", "addX"}}},
+                 {{G_OP_TYPE_ELEMENTWISE_ADD,
-                              {G_OP_TYPE_PRELU, {{"Alpha", "Alpha"}}}
+                   {{"Y", "Y"}, {"Out", "addOut"}, {"X", "addX"}}},
-                             },
+                  {G_OP_TYPE_PRELU, {{"Alpha", "Alpha"}}}},
                 removed_nodes);
  }
@@ -51,10 +51,11 @@ namespace paddle_mobile {
    DLOG << " conv add add prelu check add X ";
    return {{2, "Y"}, {2, "X"}};
  }
-        };
+};
-        template <typename DeviceType, typename T>
+template <typename DeviceType, typename T>
-        class FusionConvAddAddPReluOp : public framework::OperatorWithKernel<
+class FusionConvAddAddPReluOp
+    : public framework::OperatorWithKernel<
          DeviceType, FusionConvAddAddPReluParam<DeviceType>,
          operators::ConvAddAddPReluKernel<DeviceType, T>> {
 public:
@@ -64,21 +65,22 @@ namespace paddle_mobile {
                          std::shared_ptr<framework::Scope> scope)
      : framework::OperatorWithKernel<
            DeviceType, FusionConvAddAddPReluParam<DeviceType>,
-                    operators::ConvAddAddPReluKernel<DeviceType, T>>(type, inputs, outputs,
+            operators::ConvAddAddPReluKernel<DeviceType, T>>(
-                                                                  attrs, scope) {}
+            type, inputs, outputs, attrs, scope) {}
  using framework::OperatorWithKernel<
      DeviceType, FusionConvAddAddPReluParam<DeviceType>,
      operators::ConvAddAddPReluKernel<DeviceType, T>>::OperatorWithKernel;
  void InferShape() const override;
 protected:
-        };
+};
 #ifdef PADDLE_MOBILE_CPU
 #ifndef CONV_ADD_ADD_PRELU_REGISTER
 #define CONV_ADD_ADD_PRELU_REGISTER
-        static framework::FusionOpRegistrar fusion_conv_add_add_prelu_registrar(
+static framework::FusionOpRegistrar fusion_conv_add_add_prelu_registrar(
    new FusionConvAddAddPReluOpMatcher());
 #endif
@@ -87,7 +89,7 @@ namespace paddle_mobile {
 #endif
 #ifdef PADDLE_MOBILE_FPGA
-        #ifndef CONV_ADD_ADD_PRELU_REGISTER
+#ifndef CONV_ADD_ADD_PRELU_REGISTER
 #define CONV_ADD_ADD_PRELU_REGISTER
 static framework::FusionOpRegistrar fusion_conv_add_add_prelu_registrar(
    new FusionConvAddAddPReluOpMatcher());
@@ -95,7 +97,7 @@ static framework::FusionOpRegistrar fusion_conv_add_add_prelu_registrar(
 #endif
-    }  // namespace operators
+}  // namespace operators
 }  // namespace paddle_mobile
 #ifdef PADDLE_MOBILE_CPU

--- a/src/operators/fusion_conv_add_prelu_op.cpp
+++ b/src/operators/fusion_conv_add_prelu_op.cpp
@@ -18,10 +18,10 @@ limitations under the License. */
 #include "operators/math/conv_func.h"
 namespace paddle_mobile {
-    namespace operators {
+namespace operators {
-        template <typename Dtype, typename T>
+template <typename Dtype, typename T>
-        void FusionConvAddPReluOp<Dtype, T>::InferShape() const {
+void FusionConvAddPReluOp<Dtype, T>::InferShape() const {
  auto in_dims = this->param_.Input()->dims();
  auto filter_dims = this->param_.Filter()->dims();
  const std::vector<int> &strides = this->param_.Strides();
@@ -42,14 +42,14 @@ namespace paddle_mobile {
  }
  framework::DDim ddim = framework::make_ddim(output_shape);
  this->param_.Output()->Resize(ddim);
-        }
+}
-    }  // namespace operators
+}  // namespace operators
 }  // namespace paddle_mobile
 namespace ops = paddle_mobile::operators;
 #ifdef PADDLE_MOBILE_CPU
-REGISTER_OPERATOR_CPU(fusion_conv_add_prelu,ops::FusionConvAddPReluOp);
+REGISTER_OPERATOR_CPU(fusion_conv_add_prelu, ops::FusionConvAddPReluOp);
 #endif
 #ifdef PADDLE_MOBILE_MALI_GPU
 #endif

--- a/src/operators/fusion_conv_add_prelu_op.h
+++ b/src/operators/fusion_conv_add_prelu_op.h
@@ -24,9 +24,9 @@ limitations under the License. */
 #include "operators/op_param.h"
 namespace paddle_mobile {
-    namespace operators {
+namespace operators {
-        class FusionConvAddPReluOpMatcher : public framework::FusionOpMatcher {
+class FusionConvAddPReluOpMatcher : public framework::FusionOpMatcher {
 public:
  FusionConvAddPReluOpMatcher() {
    node_ = framework::Node(G_OP_TYPE_CONV);
@@ -43,14 +43,14 @@ namespace paddle_mobile {
                 },
                 removed_nodes);
  }
  std::string Type() { return G_OP_TYPE_FUSION_CONV_ADD_PRELU; }
-        };
+};
-        template <typename DeviceType, typename T>
+template <typename DeviceType, typename T>
-        class FusionConvAddPReluOp : public framework::OperatorWithKernel<
+class FusionConvAddPReluOp
+    : public framework::OperatorWithKernel<
          DeviceType, FusionConvAddPReluParam<DeviceType>,
          operators::ConvAddPReluKernel<DeviceType, T>> {
 public:
@@ -69,13 +69,13 @@ namespace paddle_mobile {
  void InferShape() const override;
 protected:
-        };
+};
 #ifdef PADDLE_MOBILE_CPU
 #ifndef CONV_ADD_PRELU_REGISTER
 #define CONV_ADD_PRELU_REGISTER
-        static framework::FusionOpRegistrar fusion_conv_add_prelu_registrar(
+static framework::FusionOpRegistrar fusion_conv_add_prelu_registrar(
    new FusionConvAddPReluOpMatcher());
 #endif
@@ -84,7 +84,7 @@ namespace paddle_mobile {
 #endif
 #ifdef PADDLE_MOBILE_FPGA
-        #ifndef CONV_ADD_PRELU_REGISTER
+#ifndef CONV_ADD_PRELU_REGISTER
 #define CONV_ADD_PRELU_REGISTER
 static framework::FusionOpRegistrar fusion_conv_add_prelu_registrar(
    new FusionConvAddPReluOpMatcher());
@@ -92,7 +92,7 @@ static framework::FusionOpRegistrar fusion_conv_add_prelu_registrar(
 #endif
-    }  // namespace operators
+}  // namespace operators
 }  // namespace paddle_mobile
 #ifdef PADDLE_MOBILE_CPU

--- a/src/operators/kernel/arm/conv_add_add_prelu_kernel.cpp
+++ b/src/operators/kernel/arm/conv_add_add_prelu_kernel.cpp
@@ -18,21 +18,22 @@ limitations under the License. */
 #include "operators/kernel/central-arm-func/conv_add_add_prelu_arm_func.h"
 namespace paddle_mobile {
-    namespace operators {
+namespace operators {
-        template <>
+template <>
-        bool ConvAddAddPReluKernel<CPU, float>::Init(FusionConvAddAddPReluParam<CPU> *param) {
+bool ConvAddAddPReluKernel<CPU, float>::Init(
+    FusionConvAddAddPReluParam<CPU> *param) {
  return true;
-        }
+}
-        template <>
+template <>
-        void ConvAddAddPReluKernel<CPU, float>::Compute(
+void ConvAddAddPReluKernel<CPU, float>::Compute(
    const FusionConvAddAddPReluParam<CPU> &param) const {
  ConvAddAddPReluCompute<float>(param);
-        }
+}
-        template class ConvAddAddPReluKernel<CPU, float>;
+template class ConvAddAddPReluKernel<CPU, float>;
-    }  // namespace operators
+}  // namespace operators
 }  // namespace paddle_mobile
 #endif
--- a/src/operators/kernel/arm/conv_add_prelu_kernel.cpp
+++ b/src/operators/kernel/arm/conv_add_prelu_kernel.cpp
@@ -18,21 +18,21 @@ limitations under the License. */
 #include "operators/kernel/central-arm-func/conv_add_prelu_arm_func.h"
 namespace paddle_mobile {
-    namespace operators {
+namespace operators {
-        template <>
+template <>
-        bool ConvAddPReluKernel<CPU, float>::Init(FusionConvAddPReluParam<CPU> *param) {
+bool ConvAddPReluKernel<CPU, float>::Init(FusionConvAddPReluParam<CPU> *param) {
  return true;
-        }
+}
-        template <>
+template <>
-        void ConvAddPReluKernel<CPU, float>::Compute(
+void ConvAddPReluKernel<CPU, float>::Compute(
    const FusionConvAddPReluParam<CPU> &param) const {
  ConvAddPReluCompute<float>(param);
-        }
+}
-        template class ConvAddPReluKernel<CPU, float>;
+template class ConvAddPReluKernel<CPU, float>;
-    }  // namespace operators
+}  // namespace operators
 }  // namespace paddle_mobile
 #endif
--- a/src/operators/kernel/central-arm-func/conv_add_add_prelu_arm_func.h
+++ b/src/operators/kernel/central-arm-func/conv_add_add_prelu_arm_func.h
@@ -23,10 +23,10 @@ limitations under the License. */
 #include "operators/op_param.h"
 namespace paddle_mobile {
-    namespace operators {
+namespace operators {
-        template <typename P>
+template <typename P>
-        void ConvAddAddPReluCompute(const FusionConvAddAddPReluParam<CPU> &param) {
+void ConvAddAddPReluCompute(const FusionConvAddAddPReluParam<CPU> &param) {
  const Tensor *input = param.Input();
  Tensor filter = *param.Filter();
  Tensor bias = *param.Bias();
@@ -91,7 +91,7 @@ namespace paddle_mobile {
  for (int i = 0; i < batch_size; i++) {
    Tensor in_batch = input->Slice(i, i + 1).Resize(input_shape);
    Tensor out_batch = output->Slice(i, i + 1).Resize(output_matrix_shape);
-                Tensor bias1_batch = bias1.Slice(i,i+1).Resize(output_matrix_shape);
+    Tensor bias1_batch = bias1.Slice(i, i + 1).Resize(output_matrix_shape);
    for (int g = 0; g < groups; g++) {
      Tensor in_slice = in_batch.Slice(g * in_step, (g + 1) * in_step);
@@ -115,23 +115,26 @@ namespace paddle_mobile {
      Tensor filter_slice = filter.Slice(g * out_step, (g + 1) * out_step);
      Tensor bias1_slice = bias1_batch.Slice(g * out_step, (g + 1) * out_step);
      float *biase_data1 = bias1_slice.data<float>();
-//                    int n = bias1_slice.dims()[0];
+      //                    int n = bias1_slice.dims()[0];
-//                    int m = bias1_slice.dims()[1];
+      //                    int m = bias1_slice.dims()[1];
-//                    for(int i=0;i<n*m;i++){
+      //                    for(int i=0;i<n*m;i++){
-//                        if(biase_data1[i]!=0)
+      //                        if(biase_data1[i]!=0)
-//                        DLOG<<biase_data1[i]<<",yangfei";
+      //                        DLOG<<biase_data1[i]<<",yangfei";
-//                    }
+      //                    }
-//                    math::matmul<float>(filter_slice, false, col_matrix, false,
+      //                    math::matmul<float>(filter_slice, false, col_matrix,
-//                                        static_cast<float>(1), &out_slice,
+      //                    false,
-//                                        static_cast<float>(1), true, biase_data);
+      //                                        static_cast<float>(1),
-                    math::matmulWithPRelu(filter_slice, false, col_matrix, false,
+      //                                        &out_slice,
-                                          &out_slice, p,mode, biase_data,biase_data1);
+      //                                        static_cast<float>(1), true,
-                }
+      //                                        biase_data);
+      math::matmulWithPRelu(filter_slice, false, col_matrix, false, &out_slice,
+                            p, mode, biase_data, biase_data1);
    }
  }
+}
-    }  // namespace operators
+}  // namespace operators
 }  // namespace paddle_mobile
 #endif
--- a/src/operators/kernel/central-arm-func/conv_add_prelu_arm_func.h
+++ b/src/operators/kernel/central-arm-func/conv_add_prelu_arm_func.h
@@ -23,15 +23,15 @@ limitations under the License. */
 #include "operators/op_param.h"
 namespace paddle_mobile {
-    namespace operators {
+namespace operators {
-        template <typename P>
+template <typename P>
-        void ConvAddPReluCompute(const FusionConvAddPReluParam<CPU> &param) {
+void ConvAddPReluCompute(const FusionConvAddPReluParam<CPU> &param) {
  const Tensor *input = param.Input();
  Tensor filter = *param.Filter();
  Tensor bias = *param.Bias();
-//            DLOG<<"yangfei";
+  //            DLOG<<"yangfei";
-//            DLOG<<bias.dims();
+  //            DLOG<<bias.dims();
  int axis = param.Axis();
  Tensor *output = param.Output();
  float *biase_data = bias.data<float>();
@@ -112,16 +112,19 @@ namespace paddle_mobile {
      // gemm
      Tensor out_slice = out_batch.Slice(g * out_step, (g + 1) * out_step);
      Tensor filter_slice = filter.Slice(g * out_step, (g + 1) * out_step);
-//                    math::matmul<float>(filter_slice, false, col_matrix, false,
+      //                    math::matmul<float>(filter_slice, false, col_matrix,
-//                                        static_cast<float>(1), &out_slice,
+      //                    false,
-//                                        static_cast<float>(1), true, biase_data);
+      //                                        static_cast<float>(1),
-                    math::matmulWithPRelu(filter_slice, false, col_matrix, false,
+      //                                        &out_slice,
-                                         &out_slice, p,mode, biase_data, nullptr);
+      //                                        static_cast<float>(1), true,
-                }
+      //                                        biase_data);
+      math::matmulWithPRelu(filter_slice, false, col_matrix, false, &out_slice,
+                            p, mode, biase_data, nullptr);
    }
  }
+}
-    }  // namespace operators
+}  // namespace operators
 }  // namespace paddle_mobile
 #endif
--- a/src/operators/kernel/conv_add_add_prelu_kernel.h
+++ b/src/operators/kernel/conv_add_add_prelu_kernel.h
@@ -26,20 +26,20 @@ limitations under the License. */
 #include "operators/op_param.h"
 namespace paddle_mobile {
-    namespace operators {
+namespace operators {
-        using framework::DDim;
+using framework::DDim;
-        using framework::OpKernelBase;
+using framework::OpKernelBase;
-        template <typename DeviceType, typename T>
+template <typename DeviceType, typename T>
-        class ConvAddAddPReluKernel
+class ConvAddAddPReluKernel
    : public OpKernelBase<DeviceType, FusionConvAddAddPReluParam<DeviceType>> {
 public:
  void Compute(const FusionConvAddAddPReluParam<DeviceType> &param) const;
  bool Init(FusionConvAddAddPReluParam<DeviceType> *param);
-        };
+};
-    }  // namespace operators
+}  // namespace operators
 }  // namespace paddle_mobile
 #endif
--- a/src/operators/kernel/conv_add_prelu_kernel.h
+++ b/src/operators/kernel/conv_add_prelu_kernel.h
@@ -26,20 +26,20 @@ limitations under the License. */
 #include "operators/op_param.h"
 namespace paddle_mobile {
-    namespace operators {
+namespace operators {
-        using framework::DDim;
+using framework::DDim;
-        using framework::OpKernelBase;
+using framework::OpKernelBase;
-        template <typename DeviceType, typename T>
+template <typename DeviceType, typename T>
-        class ConvAddPReluKernel
+class ConvAddPReluKernel
    : public OpKernelBase<DeviceType, FusionConvAddPReluParam<DeviceType>> {
 public:
  void Compute(const FusionConvAddPReluParam<DeviceType> &param) const;
  bool Init(FusionConvAddPReluParam<DeviceType> *param);
-        };
+};
-    }  // namespace operators
+}  // namespace operators
 }  // namespace paddle_mobile
 #endif
--- a/src/operators/math/gemm.cpp
+++ b/src/operators/math/gemm.cpp
@@ -3172,7 +3172,7 @@ void SgemmWithPRelu_omp(int m, int n, int k, const float *A, int lda,
  int max_threads = 1;
 #endif
-  int L1 = 16 / max_threads * 1024;
+  int L1 = 32 * 1024;
  KC = k;
  if (m > n) {
    // 对 A 分块

--- a/src/operators/math/math_function.cpp
+++ b/src/operators/math/math_function.cpp
@@ -110,9 +110,8 @@ void matmulWithPRelu(const framework::Tensor &matrix_a, bool trans_a,
  int K = (!trans_a) ? dim_a[1] : dim_a[0];
 #ifdef _OPENMP
-  xsSgemmWithPRelu_omp(M, N, K, matrix_a.data<float>(), K,
+  SgemmWithPRelu_omp(M, N, K, matrix_a.data<float>(), K, matrix_b.data<float>(),
-                       matrix_b.data<float>(), N, matrix_out->data<float>(), N,
+                     N, matrix_out->data<float>(), N, p, mode, bias, bias1);
-                       p, mode, bias, bias1);
 #else
  SgemmWithPRelu(M, N, K, matrix_a.data<float>(), K, matrix_b.data<float>(), N,
                 matrix_out->data<float>(), N, p, mode, bias, bias1);