Merge branch 'develop' into develop

cb9ef9fb · Ruilong Liu · GitHub · db7998bd · 85273def · cb9ef9fb
42 changed file
--- a/src/framework/operator.cpp
+++ b/src/framework/operator.cpp
@@ -61,9 +61,5 @@ template class OperatorBase<CPU>;
 template class OperatorBase<FPGA>;
 template class OperatorBase<GPU_MALI>;

-template class OperatorWithKernel<CPU>;
-template class OperatorWithKernel<FPGA>;
-template class OperatorWithKernel<GPU_MALI>;
-
 }  // namespace framework
 }  // namespace paddle_mobile
--- a/src/framework/operator.h
+++ b/src/framework/operator.h
@@ -103,16 +103,24 @@ class OperatorBase {
 /*
 * @b 这个类为所有带有运算的 op 的父类, 这个 op 继承与 OperatorBase
 * */
-template <typename Dtype>
+template <typename Dtype, typename ParamType, typename KernelType>
 class OperatorWithKernel : public OperatorBase<Dtype> {
 public:
  OperatorWithKernel(const std::string &type, const VariableNameMap &inputs,
                     const VariableNameMap &outputs, const AttributeMap &attrs,
                     std::shared_ptr<Scope> scope)
-      : OperatorBase<Dtype>(type, inputs, outputs, attrs, scope) {}
+      : OperatorBase<Dtype>(type, inputs, outputs, attrs, scope),
+        param_(inputs, outputs, attrs, *scope) {
+    kernel_.Init(param_);
+  }
+
+  virtual void RunImpl() const { this->kernel_.Compute(this->param_); }

-  virtual void RunImpl() const = 0;
  virtual void InferShape() const = 0;
+
+ protected:
+  KernelType kernel_;
+  ParamType param_;
 };

 /*
@@ -127,6 +135,7 @@ class OpKernelBase {
   *    所有结构体存在与: paddle-mobile/src/operators/op_param.h
   * */
  virtual void Compute(const P &para) const = 0;
+  virtual bool Init(const P &para) const { return true; };
  virtual ~OpKernelBase() = default;
 };


--- a/src/operators/batchnorm_op.cpp
+++ b/src/operators/batchnorm_op.cpp
@@ -23,8 +23,8 @@ namespace operators {

 template <typename Dtype, typename T>
 void BatchNormOp<Dtype, T>::InferShape() const {
-  auto x_dims = param_.InputX()->dims();
-  param_.OutputY()->Resize(x_dims);
+  auto x_dims = this->param_.InputX()->dims();
+  this->param_.OutputY()->Resize(x_dims);
 }
 template class BatchNormOp<CPU, float>;
 }  // namespace operators

--- a/src/operators/batchnorm_op.h
+++ b/src/operators/batchnorm_op.h
@@ -25,26 +25,21 @@ namespace paddle_mobile {
 namespace operators {
 using std::string;
 template <typename DeviceType, typename T>
-class BatchNormOp : public framework::OperatorWithKernel<DeviceType> {
+class BatchNormOp
+    : public framework::OperatorWithKernel<DeviceType, BatchNormParam,
+                                           BatchNormKernel<DeviceType, T>> {
 public:
  BatchNormOp(const string &type, const VariableNameMap &inputs,
              const VariableNameMap &outputs,
              const framework::AttributeMap &attrs,
              std::shared_ptr<framework::Scope> scope)
-      : framework::OperatorWithKernel<DeviceType>(type, inputs, outputs, attrs,
-                                                  scope),
-        param_(inputs, outputs, attrs, *scope) {}
+      : framework::OperatorWithKernel<DeviceType, BatchNormParam,
+                                      BatchNormKernel<DeviceType, T>>(
+            type, inputs, outputs, attrs, scope) {}

-  void RunImpl() const {
-    operators::BatchNormKernel<DeviceType, T> kernel;
-    kernel.Compute(param_);
-  }
-
-  using framework::OperatorWithKernel<DeviceType>::OperatorWithKernel;
  void InferShape() const override;

 protected:
-  BatchNormParam param_;
 };

 }  // namespace operators

--- a/src/operators/box_coder_op.cpp
+++ b/src/operators/box_coder_op.cpp
@@ -21,11 +21,11 @@ namespace operators {

 template <typename Dtype, typename T>
 void BoxCoderOp<Dtype, T>::InferShape() const {
-  auto input_priorbox_dims = param_.InputPriorBox()->dims();
-  auto input_priorboxvar_dims = param_.InputPriorBoxVar()->dims();
-  auto input_targetbox_dims = param_.InputTargetBox()->dims();
+  auto input_priorbox_dims = this->param_.InputPriorBox()->dims();
+  auto input_priorboxvar_dims = this->param_.InputPriorBoxVar()->dims();
+  auto input_targetbox_dims = this->param_.InputTargetBox()->dims();

-  auto code_type = param_.CodeType();
+  auto code_type = this->param_.CodeType();

  if (code_type == "encode_center_size") {
    if (input_targetbox_dims.size() != 2) {
@@ -44,7 +44,7 @@ void BoxCoderOp<Dtype, T>::InferShape() const {
      LOG(kLOG_ERROR) << " dimension not match";
    }
  }
-  param_.OutputBox()->Resize(framework::make_ddim(
+  this->param_.OutputBox()->Resize(framework::make_ddim(
      {input_targetbox_dims[0], input_priorbox_dims[0], 4}));
 }
 template class BoxCoderOp<CPU, float>;

--- a/src/operators/box_coder_op.h
+++ b/src/operators/box_coder_op.h
@@ -28,26 +28,24 @@ namespace operators {
 using paddle_mobile::framework::Tensor;

 template <typename DeviceType, typename T>
-class BoxCoderOp : public framework::OperatorWithKernel<DeviceType> {
+class BoxCoderOp
+    : public framework::OperatorWithKernel<
+          DeviceType, BoxCoderParam, operators::BoxCoderKernel<DeviceType, T>> {
 public:
  BoxCoderOp(const std::string &type, const VariableNameMap &inputs,
             const VariableNameMap &outputs,
             const framework::AttributeMap &attrs,
             std::shared_ptr<framework::Scope> scope)
-      : framework::OperatorWithKernel<DeviceType>(type, inputs, outputs, attrs,
-                                                  scope),
-        param_(inputs, outputs, attrs, *scope) {}
+      : framework::OperatorWithKernel<DeviceType, BoxCoderParam,
+                                      operators::BoxCoderKernel<DeviceType, T>>(
+            type, inputs, outputs, attrs, scope) {}

-  void RunImpl() const {
-    operators::BoxCoderKernel<DeviceType, T> kernel;
-    kernel.Compute(param_);
-  }
-
-  using framework::OperatorWithKernel<DeviceType>::OperatorWithKernel;
+  using framework::OperatorWithKernel<
+      DeviceType, BoxCoderParam,
+      operators::BoxCoderKernel<DeviceType, T>>::OperatorWithKernel;
  void InferShape() const override;

 protected:
-  BoxCoderParam param_;
 };

 }  // namespace operators

--- a/src/operators/concat_op.cpp
+++ b/src/operators/concat_op.cpp
@@ -21,7 +21,7 @@ namespace operators {

 template <typename Dtype, typename T>
 void ConcatOp<Dtype, T>::InferShape() const {
-  auto inputs = param_.Inputs();
+  auto inputs = this->param_.Inputs();
  const size_t n = inputs.size();

  std::vector<DDim> inputs_dims;
@@ -30,7 +30,7 @@ void ConcatOp<Dtype, T>::InferShape() const {
    inputs_dims.push_back(inputs[i]->dims());
  }

-  auto axis = static_cast<size_t>(param_.Axis());
+  auto axis = static_cast<size_t>(this->param_.Axis());

  if (n == 1) {
    DLOG << "Warning: concat op have only one input, "
@@ -54,7 +54,7 @@ void ConcatOp<Dtype, T>::InferShape() const {
    out_dims[axis] = -1;
  }

-  param_.Out()->Resize(out_dims);
+  this->param_.Out()->Resize(out_dims);
 }
 template class ConcatOp<CPU, float>;


--- a/src/operators/concat_op.h
+++ b/src/operators/concat_op.h
@@ -24,25 +24,23 @@ namespace paddle_mobile {
 namespace operators {
 using std::string;
 template <typename DeviceType, typename T>
-class ConcatOp : public framework::OperatorWithKernel<DeviceType> {
+class ConcatOp
+    : public framework::OperatorWithKernel<
+          DeviceType, ConcatParam, operators::ConcatKernel<DeviceType, T>> {
 public:
  ConcatOp(const string &type, const VariableNameMap &inputs,
           const VariableNameMap &outputs, const framework::AttributeMap &attrs,
           std::shared_ptr<framework::Scope> scope)
-      : framework::OperatorWithKernel<DeviceType>(type, inputs, outputs, attrs,
-                                                  scope),
-        param_(inputs, outputs, attrs, *scope) {}
+      : framework::OperatorWithKernel<DeviceType, ConcatParam,
+                                      operators::ConcatKernel<DeviceType, T>>(
+            type, inputs, outputs, attrs, scope) {}

-  void RunImpl() const {
-    operators::ConcatKernel<DeviceType, T> kernel;
-    kernel.Compute(param_);
-  }
-
-  using framework::OperatorWithKernel<DeviceType>::OperatorWithKernel;
+  using framework::OperatorWithKernel<
+      DeviceType, ConcatParam,
+      operators::ConcatKernel<DeviceType, T>>::OperatorWithKernel;
  void InferShape() const override;

 protected:
-  ConcatParam param_;
 };

 }  // namespace operators

--- a/src/operators/conv_op.cpp
+++ b/src/operators/conv_op.cpp
@@ -24,12 +24,12 @@ namespace operators {

 template <typename Dtype, typename T>
 void ConvOp<Dtype, T>::InferShape() const {
-  auto in_dims = param_.Input()->dims();
-  auto filter_dims = param_.Filter()->dims();
-  const std::vector<int> &strides = param_.Strides();
-  std::vector<int> paddings = param_.Paddings();
-  int groups = param_.Groups();
-  std::vector<int> dilations = param_.Dilations();
+  auto in_dims = this->param_.Input()->dims();
+  auto filter_dims = this->param_.Filter()->dims();
+  const std::vector<int> &strides = this->param_.Strides();
+  std::vector<int> paddings = this->param_.Paddings();
+  int groups = this->param_.Groups();
+  std::vector<int> dilations = this->param_.Dilations();

  PADDLE_MOBILE_ENFORCE((in_dims.size() == filter_dims.size() &&
                         dilations.size() == paddings.size() &&
@@ -44,7 +44,7 @@ void ConvOp<Dtype, T>::InferShape() const {
  }

  framework::DDim ddim = framework::make_ddim(output_shape);
-  param_.Output()->Resize(ddim);
+  this->param_.Output()->Resize(ddim);
 }

 template class ConvOp<CPU, float>;

--- a/src/operators/conv_op.h
+++ b/src/operators/conv_op.h
@@ -24,26 +24,23 @@ namespace paddle_mobile {
 namespace operators {
 using std::string;
 template <typename DeviceType, typename T>
-class ConvOp : public framework::OperatorWithKernel<DeviceType> {
+class ConvOp
+    : public framework::OperatorWithKernel<
+          DeviceType, ConvParam, operators::ConvKernel<DeviceType, T>> {
 public:
  ConvOp(const std::string &type, const VariableNameMap &inputs,
         const VariableNameMap &outputs, const framework::AttributeMap &attrs,
         std::shared_ptr<framework::Scope> scope)
-      : framework::OperatorWithKernel<DeviceType>(type, inputs, outputs, attrs,
-                                                  scope),
-        param_(inputs, outputs, attrs, *scope) {}
+      : framework::OperatorWithKernel<DeviceType, ConvParam,
+                                      operators::ConvKernel<DeviceType, T>>(
+            type, inputs, outputs, attrs, scope) {}

-  using framework::OperatorWithKernel<DeviceType>::OperatorWithKernel;
+  using framework::OperatorWithKernel<
+      DeviceType, ConvParam,
+      operators::ConvKernel<DeviceType, T>>::OperatorWithKernel;
  void InferShape() const override;

-  void RunImpl() const {
-    operators::ConvKernel<DeviceType, T> kernel;
-    kernel.Compute(param_);
-    this->ClearVariables({"Filter", "Input"});
-  }
-
 private:
-  ConvParam param_;
 };

 inline int ConvOutputSize(int input_size, int filter_size, int dilation,

--- a/src/operators/depthwise_conv_op.cpp
+++ b/src/operators/depthwise_conv_op.cpp
@@ -25,12 +25,12 @@ namespace operators {

 template <typename Dtype, typename T>
 void DepthwiseConvOp<Dtype, T>::InferShape() const {
-  auto in_dims = param_.Input()->dims();
-  auto filter_dims = param_.Filter()->dims();
-  const std::vector<int> &strides = param_.Strides();
-  std::vector<int> paddings = param_.Paddings();
-  int groups = param_.Groups();
-  std::vector<int> dilations = param_.Dilations();
+  auto in_dims = this->param_.Input()->dims();
+  auto filter_dims = this->param_.Filter()->dims();
+  const std::vector<int> &strides = this->param_.Strides();
+  std::vector<int> paddings = this->param_.Paddings();
+  int groups = this->param_.Groups();
+  std::vector<int> dilations = this->param_.Dilations();

  PADDLE_MOBILE_ENFORCE((in_dims.size() == filter_dims.size() &&
                         dilations.size() == paddings.size() &&
@@ -45,7 +45,7 @@ void DepthwiseConvOp<Dtype, T>::InferShape() const {
  }

  framework::DDim ddim = framework::make_ddim(output_shape);
-  param_.Output()->Resize(ddim);
+  this->param_.Output()->Resize(ddim);
 }

 template class DepthwiseConvOp<CPU, float>;

--- a/src/operators/depthwise_conv_op.h
+++ b/src/operators/depthwise_conv_op.h
@@ -24,27 +24,25 @@ namespace paddle_mobile {
 namespace operators {

 template <typename DeviceType, typename T>
-class DepthwiseConvOp : public framework::OperatorWithKernel<DeviceType> {
+class DepthwiseConvOp : public framework::OperatorWithKernel<
+                            DeviceType, ConvParam,
+                            operators::DepthwiseConvKernel<DeviceType, T>> {
 public:
  DepthwiseConvOp(const std::string &type, const VariableNameMap &inputs,
                  const VariableNameMap &outputs,
                  const framework::AttributeMap &attrs,
                  std::shared_ptr<framework::Scope> scope)
-      : framework::OperatorWithKernel<DeviceType>(type, inputs, outputs, attrs,
-                                                  scope),
-        param_(inputs, outputs, attrs, *scope) {}
-
-  using framework::OperatorWithKernel<DeviceType>::OperatorWithKernel;
+      : framework::OperatorWithKernel<
+            DeviceType, ConvParam,
+            operators::DepthwiseConvKernel<DeviceType, T>>(
+            type, inputs, outputs, attrs, scope) {}
+
+  using framework::OperatorWithKernel<
+      DeviceType, ConvParam,
+      operators::DepthwiseConvKernel<DeviceType, T>>::OperatorWithKernel;
  void InferShape() const override;

-  void RunImpl() const {
-    operators::DepthwiseConvKernel<DeviceType, T> kernel;
-    kernel.Compute(param_);
-    this->ClearVariables({"Filter", "Input"});
-  }
-
 private:
-  ConvParam param_;
 };

 }  // namespace operators

--- a/src/operators/elementwise_add_op.cpp
+++ b/src/operators/elementwise_add_op.cpp
@@ -21,8 +21,8 @@ namespace operators {

 template <typename Dtype, typename T>
 void ElementwiseAddOp<Dtype, T>::InferShape() const {
-  auto x_dim = param_.InputX()->dims();
-  param_.Out()->Resize(x_dim);
+  auto x_dim = this->param_.InputX()->dims();
+  this->param_.Out()->Resize(x_dim);
 }
 template class ElementwiseAddOp<CPU, float>;
 }  // namespace operators

--- a/src/operators/elementwise_add_op.h
+++ b/src/operators/elementwise_add_op.h
@@ -25,26 +25,25 @@ namespace paddle_mobile {
 namespace operators {
 using std::string;
 template <typename DeviceType, typename T>
-class ElementwiseAddOp : public framework::OperatorWithKernel<DeviceType> {
+class ElementwiseAddOp : public framework::OperatorWithKernel<
+                             DeviceType, ElementwiseAddParam,
+                             operators::ElementwiseAddKernel<DeviceType, T>> {
 public:
  ElementwiseAddOp(const string &type, const VariableNameMap &inputs,
                   const VariableNameMap &outputs,
                   const framework::AttributeMap &attrs,
                   std::shared_ptr<framework::Scope> scope)
-      : framework::OperatorWithKernel<DeviceType>(type, inputs, outputs, attrs,
-                                                  scope),
-        param_(inputs, outputs, attrs, *scope) {}
-
-  void RunImpl() const {
-    operators::ElementwiseAddKernel<DeviceType, T> kernel;
-    kernel.Compute(param_);
-  }
-
-  using framework::OperatorWithKernel<DeviceType>::OperatorWithKernel;
+      : framework::OperatorWithKernel<
+            DeviceType, ElementwiseAddParam,
+            operators::ElementwiseAddKernel<DeviceType, T>>(
+            type, inputs, outputs, attrs, scope) {}
+
+  using framework::OperatorWithKernel<
+      DeviceType, ElementwiseAddParam,
+      operators::ElementwiseAddKernel<DeviceType, T>>::OperatorWithKernel;
  void InferShape() const override;

 protected:
-  ElementwiseAddParam param_;
 };
 }  // namespace operators
 }  // namespace paddle_mobile

--- a/src/operators/fusion_conv_add.cpp
+++ b/src/operators/fusion_conv_add.cpp
@@ -21,12 +21,12 @@ namespace operators {

 template <typename Dtype, typename T>
 void FushionConvAddOp<Dtype, T>::InferShape() const {
-  auto in_dims = param_.Input()->dims();
-  auto filter_dims = param_.Filter()->dims();
-  const std::vector<int> &strides = param_.Strides();
-  std::vector<int> paddings = param_.Paddings();
-  int groups = param_.Groups();
-  std::vector<int> dilations = param_.Dilations();
+  auto in_dims = this->param_.Input()->dims();
+  auto filter_dims = this->param_.Filter()->dims();
+  const std::vector<int> &strides = this->param_.Strides();
+  std::vector<int> paddings = this->param_.Paddings();
+  int groups = this->param_.Groups();
+  std::vector<int> dilations = this->param_.Dilations();

  PADDLE_MOBILE_ENFORCE((in_dims.size() == filter_dims.size() &&
                         dilations.size() == paddings.size() &&
@@ -41,7 +41,7 @@ void FushionConvAddOp<Dtype, T>::InferShape() const {
  }

  framework::DDim ddim = framework::make_ddim(output_shape);
-  param_.Output()->Resize(ddim);
+  this->param_.Output()->Resize(ddim);
 }
 template class FushionConvAddOp<CPU, float>;
 }  // namespace operators

--- a/src/operators/fusion_conv_add.h
+++ b/src/operators/fusion_conv_add.h
@@ -47,27 +47,24 @@ class FusionConvAddMatcher : public framework::FusionOpMatcher {
 };

 template <typename DeviceType, typename T>
-class FushionConvAddOp : public framework::OperatorWithKernel<DeviceType> {
+class FushionConvAddOp : public framework::OperatorWithKernel<
+                             DeviceType, FushionConvAddParam,
+                             operators::ConvAddKernel<DeviceType, T>> {
 public:
  FushionConvAddOp(const string &type, const VariableNameMap &inputs,
                   const VariableNameMap &outputs,
                   const framework::AttributeMap &attrs,
                   std::shared_ptr<framework::Scope> scope)
-      : framework::OperatorWithKernel<DeviceType>(type, inputs, outputs, attrs,
-                                                  scope),
-        param_(inputs, outputs, attrs, *scope) {}
+      : framework::OperatorWithKernel<DeviceType, FushionConvAddParam,
+                                      operators::ConvAddKernel<DeviceType, T>>(
+            type, inputs, outputs, attrs, scope) {}

-  void RunImpl() const {
-    operators::ConvAddKernel<DeviceType, T> kernel;
-    kernel.Compute(param_);
-    this->ClearVariables({"Filter", "Input", "Y"});
-  }
-
-  using framework::OperatorWithKernel<DeviceType>::OperatorWithKernel;
+  using framework::OperatorWithKernel<
+      DeviceType, FushionConvAddParam,
+      operators::ConvAddKernel<DeviceType, T>>::OperatorWithKernel;
  void InferShape() const override;

 protected:
-  FushionConvAddParam param_;
 };

 inline int ConvOutputSize(int input_size, int filter_size, int dilation,

--- a/src/operators/fusion_fc_op.cpp
+++ b/src/operators/fusion_fc_op.cpp
@@ -20,10 +20,10 @@ namespace operators {

 template <typename Dtype, typename T>
 void FushionFcOp<Dtype, T>::InferShape() const {
-  auto x_dims = param_.InputX()->dims();
-  auto y_dims = param_.InputY()->dims();
-  int x_num_col_dims = param_.XNumColDims();
-  int y_num_col_dims = param_.YNumColDims();
+  auto x_dims = this->param_.InputX()->dims();
+  auto y_dims = this->param_.InputY()->dims();
+  int x_num_col_dims = this->param_.XNumColDims();
+  int y_num_col_dims = this->param_.YNumColDims();

  assert(x_dims.size() > x_num_col_dims);
  assert(y_dims.size() > y_num_col_dims);
@@ -47,7 +47,7 @@ void FushionFcOp<Dtype, T>::InferShape() const {
  }

  framework::DDim ddim = framework::make_ddim(output_dims);
-  param_.Out()->Resize(ddim);
+  this->param_.Out()->Resize(ddim);
 }
 template class FushionFcOp<CPU, float>;
 }  // namespace operators

--- a/src/operators/fusion_fc_op.h
+++ b/src/operators/fusion_fc_op.h
@@ -45,26 +45,25 @@ class FusionFcMatcher : public framework::FusionOpMatcher {
 };

 template <typename DeviceType, typename T>
-class FushionFcOp : public framework::OperatorWithKernel<DeviceType> {
+class FushionFcOp : public framework::OperatorWithKernel<
+                        DeviceType, FushionFcParam,
+                        operators::FushionFcKernel<DeviceType, T>> {
 public:
  FushionFcOp(const string &type, const VariableNameMap &inputs,
              const VariableNameMap &outputs,
              const framework::AttributeMap &attrs,
              std::shared_ptr<framework::Scope> scope)
-      : framework::OperatorWithKernel<DeviceType>(type, inputs, outputs, attrs,
-                                                  scope),
-        param_(inputs, outputs, attrs, *scope) {}
-
-  void RunImpl() const {
-    operators::FushionFcKernel<DeviceType, T> kernel;
-    kernel.Compute(param_);
-  }
-
-  using framework::OperatorWithKernel<DeviceType>::OperatorWithKernel;
+      : framework::OperatorWithKernel<
+            DeviceType, FushionFcParam,
+            operators::FushionFcKernel<DeviceType, T>>(type, inputs, outputs,
+                                                       attrs, scope) {}
+
+  using framework::OperatorWithKernel<
+      DeviceType, FushionFcParam,
+      operators::FushionFcKernel<DeviceType, T>>::OperatorWithKernel;
  void InferShape() const override;

 protected:
-  FushionFcParam param_;
 };

 #ifdef PADDLE_MOBILE_CPU

--- a/src/operators/kernel/arm/batchnorm_kernel.cpp
+++ b/src/operators/kernel/arm/batchnorm_kernel.cpp
@@ -61,19 +61,20 @@ void BatchNormKernel<CPU, float>::Compute(const BatchNormParam &param) const {
    /// std = (var + epsilon).sqrt();
    /// inv_std = 1 / std;
    for (int i = 0; i < C * 4; i += 4) {
+      int index = i / 4;
      inv_std_ptr[i] =
-          1 / static_cast<float>(pow((variance_ptr[i / 4] + epsilon), 0.5));
+          1 / static_cast<float>(pow((variance_ptr[index] + epsilon), 0.5));
      inv_std_ptr[i + 1] = inv_std_ptr[i];
      inv_std_ptr[i + 2] = inv_std_ptr[i];
      inv_std_ptr[i + 3] = inv_std_ptr[i];

-      new_scale_ptr[i] = inv_std_ptr[i] * scale_ptr[i / 4];
+      new_scale_ptr[i] = inv_std_ptr[i] * scale_ptr[index];
      new_scale_ptr[i + 1] = new_scale_ptr[i];
      new_scale_ptr[i + 2] = new_scale_ptr[i];
      new_scale_ptr[i + 3] = new_scale_ptr[i];

      new_bias_ptr[i] =
-          bias_ptr[i / 4] - mean_ptr[i / 4] * inv_std_ptr[i] * scale_ptr[i / 4];
+          bias_ptr[index] - mean_ptr[index] * inv_std_ptr[i] * scale_ptr[index];

      new_bias_ptr[i + 1] = new_bias_ptr[i];
      new_bias_ptr[i + 2] = new_bias_ptr[i];
@@ -164,21 +165,21 @@ void BatchNormKernel<CPU, float>::Compute(const BatchNormParam &param) const {
        "vadd.f32   q7,  q7,  q10 \n\t"
        "vadd.f32   q8,  q8,  q10 \n\t"

-        "add %[out_ptr], %[out_ptr], r6       \n\t"
+        "add %[out_ptr], %[out_ptr], r6         \n\t"
        "vst1.32 {q1, q2}, [%[out_ptr]]!        \n\t"
-        "vst1.32 {q3, q4}, [%[out_ptr]]!       \n\t"
-        "vst1.32 {q5, q6}, [%[out_ptr]]!       \n\t"
-        "vst1.32 {q7, q8}, [%[out_ptr]]!       \n\t"
+        "vst1.32 {q3, q4}, [%[out_ptr]]!        \n\t"
+        "vst1.32 {q5, q6}, [%[out_ptr]]!        \n\t"
+        "vst1.32 {q7, q8}, [%[out_ptr]]!        \n\t"

-        "end_remainder_%=:                     \n\t"
+        "end_remainder_%=:                      \n\t"

        "subs %[C], %[C], #1                    \n\t"
        "bge        loop_c_%=                   \n\t"
        "end_c_%=:                              \n\t"

-        "subs %[N], %[N], #1                  \n\t"
-        "bge        loop_n_%=                \n\t"
-        "end_n_%=:                           \n\t"
+        "subs %[N], %[N], #1                    \n\t"
+        "bge        loop_n_%=                   \n\t"
+        "end_n_%=:                              \n\t"
        :
        : [input_x_ptr] "r"(input_x_ptr), [out_ptr] "r"(out_ptr),
          [new_scale_ptr] "r"(new_scale_ptr), [new_bias_ptr] "r"(new_bias_ptr),
@@ -232,6 +233,7 @@ void BatchNormKernel<CPU, float>::Compute(const BatchNormParam &param) const {
    //    DLOG << "out_ptr : " << out_ptr[102];
  }
 }
+
 }  // namespace operators
 }  // namespace paddle_mobile


--- a/src/operators/kernel/mali/conv_kernel.cpp
+++ b/src/operators/kernel/mali/conv_kernel.cpp
@@ -20,7 +20,10 @@ namespace paddle_mobile {
 namespace operators {

 template <>
-void ConvKernel<GPU_MALI, float>::Compute(const ConvParam &param) const {}
+void ConvKernel<GPU_MALI, float>::Compute(const ConvParam &param) const {
+  //  ArmConvImplement imp;
+  //  imp.Compute(param);
+}

 template class ConvKernel<GPU_MALI, float>;
 }  // namespace operators

--- a/src/operators/lrn_op.cpp
+++ b/src/operators/lrn_op.cpp
@@ -21,8 +21,8 @@ namespace operators {

 template <typename Dtype, typename T>
 void LrnOp<Dtype, T>::InferShape() const {
-  auto x_dims = param_.InputX()->dims();
-  param_.Out()->Resize(x_dims);
+  auto x_dims = this->param_.InputX()->dims();
+  this->param_.Out()->Resize(x_dims);
 }
 template class LrnOp<CPU, float>;
 }  // namespace operators

--- a/src/operators/lrn_op.h
+++ b/src/operators/lrn_op.h
@@ -25,25 +25,22 @@ namespace paddle_mobile {
 namespace operators {
 using std::string;
 template <typename DeviceType, typename T>
-class LrnOp : public framework::OperatorWithKernel<DeviceType> {
+class LrnOp : public framework::OperatorWithKernel<
+                  DeviceType, LrnParam, operators::LrnKernel<DeviceType, T>> {
 public:
  LrnOp(const string &type, const VariableNameMap &inputs,
        const VariableNameMap &outputs, const framework::AttributeMap &attrs,
        std::shared_ptr<framework::Scope> scope)
-      : framework::OperatorWithKernel<DeviceType>(type, inputs, outputs, attrs,
-                                                  scope),
-        param_(inputs, outputs, attrs, *scope) {}
+      : framework::OperatorWithKernel<DeviceType, LrnParam,
+                                      operators::LrnKernel<DeviceType, T>>(
+            type, inputs, outputs, attrs, scope) {}

-  void RunImpl() const {
-    operators::LrnKernel<DeviceType, T> kernel;
-    kernel.Compute(param_);
-  }
-
-  using framework::OperatorWithKernel<DeviceType>::OperatorWithKernel;
+  using framework::OperatorWithKernel<
+      DeviceType, LrnParam,
+      operators::LrnKernel<DeviceType, T>>::OperatorWithKernel;
  void InferShape() const override;

 protected:
-  LrnParam param_;
 };

 }  // namespace operators

--- a/src/operators/mul_op.cpp
+++ b/src/operators/mul_op.cpp
@@ -21,10 +21,10 @@ namespace operators {

 template <typename Dtype, typename T>
 void MulOp<Dtype, T>::InferShape() const {
-  auto x_dims = param_.InputX()->dims();
-  auto y_dims = param_.InputY()->dims();
-  int x_num_col_dims = param_.XNumColDims();
-  int y_num_col_dims = param_.YNumColDims();
+  auto x_dims = this->param_.InputX()->dims();
+  auto y_dims = this->param_.InputY()->dims();
+  int x_num_col_dims = this->param_.XNumColDims();
+  int y_num_col_dims = this->param_.YNumColDims();

  assert(x_dims.size() > x_num_col_dims);
  assert(y_dims.size() > y_num_col_dims);
@@ -48,7 +48,7 @@ void MulOp<Dtype, T>::InferShape() const {
  }

  framework::DDim ddim = framework::make_ddim(output_dims);
-  param_.Out()->Resize(ddim);
+  this->param_.Out()->Resize(ddim);
 }
 template class MulOp<CPU, float>;
 }  // namespace operators

--- a/src/operators/mul_op.h
+++ b/src/operators/mul_op.h
@@ -25,25 +25,22 @@ namespace paddle_mobile {
 namespace operators {

 template <typename DeviceType, typename T>
-class MulOp : public framework::OperatorWithKernel<DeviceType> {
+class MulOp : public framework::OperatorWithKernel<
+                  DeviceType, MulParam, operators::MulKernel<DeviceType, T>> {
 public:
  MulOp(const std::string &type, const VariableNameMap &inputs,
        const VariableNameMap &outputs, const framework::AttributeMap &attrs,
        std::shared_ptr<framework::Scope> scope)
-      : framework::OperatorWithKernel<DeviceType>(type, inputs, outputs, attrs,
-                                                  scope),
-        param_(inputs, outputs, attrs, *scope) {}
+      : framework::OperatorWithKernel<DeviceType, MulParam,
+                                      operators::MulKernel<DeviceType, T>>(
+            type, inputs, outputs, attrs, scope) {}

-  void RunImpl() const {
-    operators::MulKernel<DeviceType, T> kernel;
-    kernel.Compute(param_);
-  }
-
-  using framework::OperatorWithKernel<DeviceType>::OperatorWithKernel;
+  using framework::OperatorWithKernel<
+      DeviceType, MulParam,
+      operators::MulKernel<DeviceType, T>>::OperatorWithKernel;
  void InferShape() const override;

 protected:
-  MulParam param_;
 };

 }  // namespace operators

--- a/src/operators/multiclass_nms_op.cpp
+++ b/src/operators/multiclass_nms_op.cpp
@@ -20,8 +20,8 @@ namespace operators {

 template <typename Dtype, typename T>
 void MultiClassNMSOp<Dtype, T>::InferShape() const {
-  auto input_bboxes_dims = param_.InputBBoxes()->dims();
-  auto input_scores_dims = param_.InputScores()->dims();
+  auto input_bboxes_dims = this->param_.InputBBoxes()->dims();
+  auto input_scores_dims = this->param_.InputScores()->dims();
  if (input_scores_dims.size() != 3) {
    LOG(kLOG_ERROR) << "Input Scores size must be 3";
  }
@@ -32,7 +32,7 @@ void MultiClassNMSOp<Dtype, T>::InferShape() const {
    LOG(kLOG_ERROR) << "Predict bboxes must be equal";
  }
  // pre size, will change in Compute.
-  param_.Out()->Resize(framework::make_ddim({input_bboxes_dims[1], 6}));
+  this->param_.Out()->Resize(framework::make_ddim({input_bboxes_dims[1], 6}));
 }
 template class MultiClassNMSOp<CPU, float>;
 }  // namespace operators

--- a/src/operators/multiclass_nms_op.h
+++ b/src/operators/multiclass_nms_op.h
@@ -28,26 +28,25 @@ namespace operators {
 using paddle_mobile::framework::Tensor;

 template <typename DeviceType, typename T>
-class MultiClassNMSOp : public framework::OperatorWithKernel<DeviceType> {
+class MultiClassNMSOp : public framework::OperatorWithKernel<
+                            DeviceType, MultiClassNMSParam,
+                            operators::MultiClassNMSKernel<DeviceType, T>> {
 public:
  MultiClassNMSOp(const std::string &type, const VariableNameMap &inputs,
                  const VariableNameMap &outputs,
                  const framework::AttributeMap &attrs,
                  std::shared_ptr<framework::Scope> scope)
-      : framework::OperatorWithKernel<DeviceType>(type, inputs, outputs, attrs,
-                                                  scope),
-        param_(inputs, outputs, attrs, *scope) {}
-
-  void RunImpl() const {
-    operators::MultiClassNMSKernel<DeviceType, T> kernel;
-    kernel.Compute(param_);
-  }
-
-  using framework::OperatorWithKernel<DeviceType>::OperatorWithKernel;
+      : framework::OperatorWithKernel<
+            DeviceType, MultiClassNMSParam,
+            operators::MultiClassNMSKernel<DeviceType, T>>(
+            type, inputs, outputs, attrs, scope) {}
+
+  using framework::OperatorWithKernel<
+      DeviceType, MultiClassNMSParam,
+      operators::MultiClassNMSKernel<DeviceType, T>>::OperatorWithKernel;
  void InferShape() const override;

 protected:
-  MultiClassNMSParam param_;
 };

 }  // namespace operators

--- a/src/operators/pool_op.cpp
+++ b/src/operators/pool_op.cpp
@@ -34,13 +34,13 @@ int PoolOutputSize(int input_size, int filter_size, int padding, int stride,
 }
 template <typename DeviceType, typename T>
 void PoolOp<DeviceType, T>::InferShape() const {
-  auto in_x_dims = param_.Input()->dims();
-  std::vector<int> ksize = param_.Ksize();
-  std::vector<int> paddings = param_.Paddings();
-  std::vector<int> strides = param_.Strides();
-  bool ceil_mode = param_.isCeilMode();
+  auto in_x_dims = this->param_.Input()->dims();
+  std::vector<int> ksize = this->param_.Ksize();
+  std::vector<int> paddings = this->param_.Paddings();
+  std::vector<int> strides = this->param_.Strides();
+  bool ceil_mode = this->param_.isCeilMode();

-  if (param_.isGlobalPooling()) {
+  if (this->param_.isGlobalPooling()) {
    ksize.resize(static_cast<size_t>(in_x_dims.size()) - 2);
    for (size_t i = 0; i < ksize.size(); ++i) {
      paddings[i] = 0;
@@ -52,7 +52,7 @@ void PoolOp<DeviceType, T>::InferShape() const {
    output_shape.push_back(PoolOutputSize(in_x_dims[i + 2], ksize[i],
                                          paddings[i], strides[i], ceil_mode));
  }
-  param_.Output()->Resize(framework::make_ddim(output_shape));
+  this->param_.Output()->Resize(framework::make_ddim(output_shape));
 }
 template class PoolOp<CPU, float>;
 }  // namespace operators

--- a/src/operators/pool_op.h
+++ b/src/operators/pool_op.h
@@ -29,24 +29,21 @@ using framework::OperatorWithKernel;
 using framework::Scope;
 using std::string;
 template <typename DeviceType, typename T>
-class PoolOp : public OperatorWithKernel<DeviceType> {
+class PoolOp : public OperatorWithKernel<DeviceType, PoolParam,
+                                         operators::PoolKernel<DeviceType, T>> {
 public:
  PoolOp(const string &type, const VariableNameMap &inputs,
         const VariableNameMap &outputs, const AttributeMap &attrs,
         std::shared_ptr<Scope> scope)
-      : OperatorWithKernel<DeviceType>(type, inputs, outputs, attrs, scope),
-        param_(inputs, outputs, attrs, *scope) {}
-  using OperatorWithKernel<DeviceType>::OperatorWithKernel;
+      : OperatorWithKernel<DeviceType, PoolParam,
+                           operators::PoolKernel<DeviceType, T>>(
+            type, inputs, outputs, attrs, scope) {}
+  using OperatorWithKernel<
+      DeviceType, PoolParam,
+      operators::PoolKernel<DeviceType, T>>::OperatorWithKernel;
  void InferShape() const override;

-  void RunImpl() const {
-    operators::PoolKernel<DeviceType, T> kernel;
-    kernel.Compute(param_);
-    this->ClearVariables({"X"});
-  }
-
 private:
-  PoolParam param_;
 };
 }  // namespace operators
 }  // namespace paddle_mobile

--- a/src/operators/prior_box_op.cpp
+++ b/src/operators/prior_box_op.cpp
@@ -21,13 +21,13 @@ namespace operators {

 template <typename Dtype, typename T>
 void PriorBoxOp<Dtype, T>::InferShape() const {
-  auto input_dims = param_.Input()->dims();
-  auto input_image_dims = param_.InputImage()->dims();
-  auto min_sizes = param_.MinSizes();
-  auto max_sizes = param_.MaxSizes();
-  auto variances = param_.Variances();
-  auto aspect_ratios = param_.AspectRatios();
-  bool flip = param_.Flip();
+  auto input_dims = this->param_.Input()->dims();
+  auto input_image_dims = this->param_.InputImage()->dims();
+  auto min_sizes = this->param_.MinSizes();
+  auto max_sizes = this->param_.MaxSizes();
+  auto variances = this->param_.Variances();
+  auto aspect_ratios = this->param_.AspectRatios();
+  bool flip = this->param_.Flip();
  std::vector<float> aspect_ratios_vec;
  ExpandAspectRatios(aspect_ratios, flip, &aspect_ratios_vec);

@@ -41,8 +41,8 @@ void PriorBoxOp<Dtype, T>::InferShape() const {
  dim_vec[1] = input_dims[3];
  dim_vec[2] = num_priors;
  dim_vec[3] = 4;
-  param_.OutputBoxes()->Resize(framework::make_ddim(dim_vec));
-  param_.OutputVariances()->Resize(framework::make_ddim(dim_vec));
+  this->param_.OutputBoxes()->Resize(framework::make_ddim(dim_vec));
+  this->param_.OutputVariances()->Resize(framework::make_ddim(dim_vec));
 }
 template class PriorBoxOp<CPU, float>;
 }  // namespace operators

--- a/src/operators/prior_box_op.h
+++ b/src/operators/prior_box_op.h
@@ -28,26 +28,24 @@ namespace operators {
 using paddle_mobile::framework::Tensor;

 template <typename DeviceType, typename T>
-class PriorBoxOp : public framework::OperatorWithKernel<DeviceType> {
+class PriorBoxOp
+    : public framework::OperatorWithKernel<
+          DeviceType, PriorBoxParam, operators::PriorBoxKernel<DeviceType, T>> {
 public:
  PriorBoxOp(const std::string &type, const VariableNameMap &inputs,
             const VariableNameMap &outputs,
             const framework::AttributeMap &attrs,
             std::shared_ptr<framework::Scope> scope)
-      : framework::OperatorWithKernel<DeviceType>(type, inputs, outputs, attrs,
-                                                  scope),
-        param_(inputs, outputs, attrs, *scope) {}
+      : framework::OperatorWithKernel<DeviceType, PriorBoxParam,
+                                      operators::PriorBoxKernel<DeviceType, T>>(
+            type, inputs, outputs, attrs, scope) {}

-  void RunImpl() const {
-    operators::PriorBoxKernel<DeviceType, T> kernel;
-    kernel.Compute(param_);
-  }
-
-  using framework::OperatorWithKernel<DeviceType>::OperatorWithKernel;
+  using framework::OperatorWithKernel<
+      DeviceType, PriorBoxParam,
+      operators::PriorBoxKernel<DeviceType, T>>::OperatorWithKernel;
  void InferShape() const override;

 protected:
-  PriorBoxParam param_;
 };

 }  // namespace operators

--- a/src/operators/relu_op.cpp
+++ b/src/operators/relu_op.cpp
@@ -20,8 +20,8 @@ namespace operators {

 template <typename Dtype, typename T>
 void ReluOp<Dtype, T>::InferShape() const {
-  auto input_dims = param_.InputX()->dims();
-  param_.Out()->Resize(input_dims);
+  auto input_dims = this->param_.InputX()->dims();
+  this->param_.Out()->Resize(input_dims);
 }
 template class ReluOp<CPU, float>;
 }  // namespace operators

--- a/src/operators/relu_op.h
+++ b/src/operators/relu_op.h
@@ -28,7 +28,9 @@ namespace operators {
 using paddle_mobile::framework::Tensor;

 template <typename DeviceType, typename T>
-class ReluOp : public framework::OperatorWithKernel<DeviceType> {
+class ReluOp
+    : public framework::OperatorWithKernel<
+          DeviceType, ReluParam, operators::ReluKernel<DeviceType, T>> {
 public:
  /*
   * @b op 的实例化方法, 需要调用父类的实例化方法, 以及实例化自己的参数结构体
@@ -36,27 +38,16 @@ class ReluOp : public framework::OperatorWithKernel<DeviceType> {
  ReluOp(const std::string &type, const VariableNameMap &inputs,
         const VariableNameMap &outputs, const framework::AttributeMap &attrs,
         std::shared_ptr<framework::Scope> scope)
-      : framework::OperatorWithKernel<DeviceType>(type, inputs, outputs, attrs,
-                                                  scope),
-        param_(inputs, outputs, attrs, *scope) {}
+      : framework::OperatorWithKernel<DeviceType, ReluParam,
+                                      operators::ReluKernel<DeviceType, T>>(
+            type, inputs, outputs, attrs, scope) {}

-  /*
-   * @b op 进行运算, 调用相应的 kernel 进行运算
-   * */
-  void RunImpl() const {
-    operators::ReluKernel<DeviceType, T> kernel;
-    kernel.Compute(param_);
-  }
-
-  using framework::OperatorWithKernel<DeviceType>::OperatorWithKernel;
+  using framework::OperatorWithKernel<
+      DeviceType, ReluParam,
+      operators::ReluKernel<DeviceType, T>>::OperatorWithKernel;
  void InferShape() const override;

 protected:
-  /*
-   * @b Relu kernel 进行运算时所需要用到参数的结构体,
-   *    结构体定义在: paddle-mobile/src/operators/op_param.h
-   * */
-  ReluParam param_;
 };

 }  // namespace operators

--- a/src/operators/reshape_op.cpp
+++ b/src/operators/reshape_op.cpp
@@ -22,10 +22,10 @@ namespace operators {
 template <typename Dtype, typename T>
 void ReshapeOp<Dtype, T>::InferShape() const {
  /// todo: add InputShape() detection.
-  auto &shape = param_.Shape();
-  auto input_x_dims = param_.InputX()->dims();
+  auto &shape = this->param_.Shape();
+  auto input_x_dims = this->param_.InputX()->dims();
  auto out_dims = ValidateShape(shape, input_x_dims);
-  param_.Out()->Resize(out_dims);
+  this->param_.Out()->Resize(out_dims);
 }
 template class ReshapeOp<CPU, float>;
 }  // namespace operators

--- a/src/operators/reshape_op.h
+++ b/src/operators/reshape_op.h
@@ -28,26 +28,24 @@ namespace operators {
 using paddle_mobile::framework::Tensor;

 template <typename DeviceType, typename T>
-class ReshapeOp : public framework::OperatorWithKernel<DeviceType> {
+class ReshapeOp
+    : public framework::OperatorWithKernel<
+          DeviceType, ReshapeParam, operators::ReshapeKernel<DeviceType, T>> {
 public:
  ReshapeOp(const std::string &type, const VariableNameMap &inputs,
            const VariableNameMap &outputs,
            const framework::AttributeMap &attrs,
            std::shared_ptr<framework::Scope> scope)
-      : framework::OperatorWithKernel<DeviceType>(type, inputs, outputs, attrs,
-                                                  scope),
-        param_(inputs, outputs, attrs, *scope) {}
+      : framework::OperatorWithKernel<DeviceType, ReshapeParam,
+                                      operators::ReshapeKernel<DeviceType, T>>(
+            type, inputs, outputs, attrs, scope) {}

-  void RunImpl() const {
-    operators::ReshapeKernel<DeviceType, T> kernel;
-    kernel.Compute(param_);
-  }
-
-  using framework::OperatorWithKernel<DeviceType>::OperatorWithKernel;
+  using framework::OperatorWithKernel<
+      DeviceType, ReshapeParam,
+      operators::ReshapeKernel<DeviceType, T>>::OperatorWithKernel;
  void InferShape() const override;

 protected:
-  ReshapeParam param_;
 };

 }  // namespace operators

--- a/src/operators/sigmoid_op.cpp
+++ b/src/operators/sigmoid_op.cpp
@@ -20,7 +20,7 @@ namespace paddle_mobile {
 namespace operators {
 template <typename DeviceType, typename T>
 void SigmoidOp<DeviceType, T>::InferShape() const {
-  param_.Out()->Resize(param_.InputX()->dims());
+  this->param_.Out()->Resize(this->param_.InputX()->dims());
 }
 template class SigmoidOp<CPU, float>;
 }  // namespace operators

--- a/src/operators/sigmoid_op.h
+++ b/src/operators/sigmoid_op.h
@@ -25,28 +25,23 @@ limitations under the License. */
 namespace paddle_mobile {
 namespace operators {
 template <typename DeviceType, typename T>
-class SigmoidOp : public framework::OperatorWithKernel<DeviceType> {
+class SigmoidOp
+    : public framework::OperatorWithKernel<
+          DeviceType, SigmoidParam, operators::SigmoidKernel<DeviceType, T>> {
 public:
  SigmoidOp(const std::string &type, const VariableNameMap &inputs,
            const VariableNameMap &outputs,
            const framework::AttributeMap &attrs,
            std::shared_ptr<framework::Scope> scope)
-      : framework::OperatorWithKernel<DeviceType>(type, inputs, outputs, attrs,
-                                                  scope),
-        param_(inputs, outputs, attrs, *scope) {}
+      : framework::OperatorWithKernel<DeviceType, SigmoidParam,
+                                      operators::SigmoidKernel<DeviceType, T>>(
+            type, inputs, outputs, attrs, scope) {}

-  using framework::OperatorWithKernel<DeviceType>::OperatorWithKernel;
+  using framework::OperatorWithKernel<
+      DeviceType, SigmoidParam,
+      operators::SigmoidKernel<DeviceType, T>>::OperatorWithKernel;

  void InferShape() const override;
-
-  void RunImpl() const {
-    operators::SigmoidKernel<DeviceType, T> kernel;
-    kernel.Compute(param_);
-    this->ClearVariables({"X"});
-  }
-
- private:
-  SigmoidParam param_;
 };
 }  // namespace operators
 }  // namespace paddle_mobile

--- a/src/operators/softmax_op.cpp
+++ b/src/operators/softmax_op.cpp
@@ -20,7 +20,7 @@ namespace paddle_mobile {
 namespace operators {
 template <typename DeviceType, typename T>
 void SoftmaxOp<DeviceType, T>::InferShape() const {
-  param_.Out()->Resize(param_.InputX()->dims());
+  this->param_.Out()->Resize(this->param_.InputX()->dims());
 }
 template class SoftmaxOp<CPU, float>;
 }  // namespace operators

--- a/src/operators/softmax_op.h
+++ b/src/operators/softmax_op.h
@@ -25,28 +25,25 @@ limitations under the License. */
 namespace paddle_mobile {
 namespace operators {
 template <typename DeviceType, typename T>
-class SoftmaxOp : public framework::OperatorWithKernel<DeviceType> {
+class SoftmaxOp
+    : public framework::OperatorWithKernel<
+          DeviceType, SoftmaxParam, operators::SoftmaxKernel<DeviceType, T>> {
 public:
  SoftmaxOp(const std::string &type, const VariableNameMap &inputs,
            const VariableNameMap &outputs,
            const framework::AttributeMap &attrs,
            std::shared_ptr<framework::Scope> scope)
-      : framework::OperatorWithKernel<DeviceType>(type, inputs, outputs, attrs,
-                                                  scope),
-        param_(inputs, outputs, attrs, *scope) {}
+      : framework::OperatorWithKernel<DeviceType, SoftmaxParam,
+                                      operators::SoftmaxKernel<DeviceType, T>>(
+            type, inputs, outputs, attrs, scope) {}

-  using framework::OperatorWithKernel<DeviceType>::OperatorWithKernel;
+  using framework::OperatorWithKernel<
+      DeviceType, SoftmaxParam,
+      operators::SoftmaxKernel<DeviceType, T>>::OperatorWithKernel;

  void InferShape() const override;

-  void RunImpl() const {
-    operators::SoftmaxKernel<DeviceType, T> kernel;
-    kernel.Compute(param_);
-    this->ClearVariables({"X"});
-  }
-
 private:
-  SoftmaxParam param_;
 };
 }  // namespace operators
 }  // namespace paddle_mobile

--- a/src/operators/transpose_op.cpp
+++ b/src/operators/transpose_op.cpp
@@ -23,8 +23,8 @@ namespace operators {

 template <typename Dtype, typename T>
 void TransposeOp<Dtype, T>::InferShape() const {
-  auto input_x_dims = param_.InputX()->dims();
-  auto axis = param_.Axis();
+  auto input_x_dims = this->param_.InputX()->dims();
+  auto axis = this->param_.Axis();

  size_t x_dims_size = input_x_dims.size();
  size_t axis_size = axis.size();
@@ -45,7 +45,7 @@ void TransposeOp<Dtype, T>::InferShape() const {
  for (size_t i = 0; i < axis_size; i++) {
    out_dims[i] = input_x_dims[axis[i]];
  }
-  param_.Out()->Resize(out_dims);
+  this->param_.Out()->Resize(out_dims);
 }
 template class TransposeOp<CPU, float>;
 }  // namespace operators

--- a/src/operators/transpose_op.h
+++ b/src/operators/transpose_op.h
@@ -28,26 +28,23 @@ namespace operators {
 using paddle_mobile::framework::Tensor;

 template <typename DeviceType, typename T>
-class TransposeOp : public framework::OperatorWithKernel<DeviceType> {
+class TransposeOp : public framework::OperatorWithKernel<
+                        DeviceType, TransposeParam,
+                        operators::TransposeKernel<DeviceType, T>> {
 public:
  TransposeOp(const std::string &type, const VariableNameMap &inputs,
              const VariableNameMap &outputs,
              const framework::AttributeMap &attrs,
              std::shared_ptr<framework::Scope> scope)
-      : framework::OperatorWithKernel<DeviceType>(type, inputs, outputs, attrs,
-                                                  scope),
-        param_(inputs, outputs, attrs, *scope) {}
-
-  void RunImpl() const {
-    operators::TransposeKernel<DeviceType, T> kernel;
-    kernel.Compute(param_);
-  }
-
-  using framework::OperatorWithKernel<DeviceType>::OperatorWithKernel;
+      : framework::OperatorWithKernel<
+            DeviceType, TransposeParam,
+            operators::TransposeKernel<DeviceType, T>>(type, inputs, outputs,
+                                                       attrs, scope) {}
+
+  using framework::OperatorWithKernel<
+      DeviceType, TransposeParam,
+      operators::TransposeKernel<DeviceType, T>>::OperatorWithKernel;
  void InferShape() const override;
-
- protected:
-  TransposeParam param_;
 };

 }  // namespace operators

--- a/test/CMakeLists.txt
+++ b/test/CMakeLists.txt
@@ -137,6 +137,10 @@ else ()
    ADD_EXECUTABLE(test-depthwise-conv-op operators/test_depthwise_conv_op.cpp test_helper.h test_include.h executor_for_test.h)
    target_link_libraries(test-depthwise-conv-op paddle-mobile)

+    # gen test
+    ADD_EXECUTABLE(test-mobilenet net/test_mobilenet.cpp test_helper.h  test_include.h executor_for_test.h)
+    target_link_libraries(test-mobilenet paddle-mobile)
+
    #add_library(test-lib-size SHARED common/test_lib_size.h common/test_lib_size.cpp)

 endif()
--- a/test/net/test_mobilenet.cpp
+++ b/test/net/test_mobilenet.cpp
@@ -19,14 +19,14 @@ limitations under the License. */
 int main() {
  paddle_mobile::Loader<paddle_mobile::CPU> loader;
  auto time1 = time();
-  auto program = loader.Load(g_mobilenet, false);
+  auto program = loader.Load(g_mobilenet, true);
  auto time2 = time();
  DLOG << "load cost :" << time_diff(time1, time1) << "ms";
-  paddle_mobile::Executor<paddle_mobile::CPU> executor(program, 2, false);
+  paddle_mobile::Executor<paddle_mobile::CPU> executor(program, 1, true);

-  std::vector<int64_t> dims{2, 3, 224, 224};
+  std::vector<int64_t> dims{1, 3, 224, 224};
  Tensor input_tensor;
-  SetupTensor<float>(&input_tensor, {2, 3, 224, 224}, static_cast<float>(0),
+  SetupTensor<float>(&input_tensor, {1, 3, 224, 224}, static_cast<float>(0),
                     static_cast<float>(1));

  std::vector<float> input(input_tensor.data<float>(),