Remove int8 conv/fc fusion ops since leadding to float model prediction failure

889c8ebc · hjchen2 · 368073f4 · 889c8ebc · 889c8ebc · 889c8ebc
16 changed file
--- a/src/common/types.cpp
+++ b/src/common/types.cpp
@@ -24,7 +24,6 @@ const char *G_OP_TYPE_CONCAT = "concat";
 const char *G_OP_TYPE_ELEMENTWISE_ADD = "elementwise_add";
 const char *G_OP_TYPE_FILL_CONSTANT = "fill_constant";
 const char *G_OP_TYPE_FUSION_CONV_ADD_RELU = "fusion_conv_add_relu";
-const char *G_OP_TYPE_FUSION_CONV_ADD_RELU_INT8 = "fusion_conv_add_relu_int8";
 const char *G_OP_TYPE_FUSION_CONV_ADD_PRELU = "fusion_conv_add_prelu";
 const char *G_OP_TYPE_FUSION_CONV_ADD_ADD_PRELU = "fusion_conv_add_add_prelu";
 const char *G_OP_TYPE_FUSION_CONV_ADD_BN_RELU = "fusion_conv_add_bn_relu";
@@ -32,7 +31,6 @@ const char *G_OP_TYPE_FUSION_CONV_BN_ADD_RELU = "fusion_conv_bn_add_relu";
 const char *G_OP_TYPE_FUSION_DWCONV_BN_RELU = "fusion_dwconv_bn_relu";
 const char *G_OP_TYPE_FUSION_CONV_BN_RELU = "fusion_conv_bn_relu";
 const char *G_OP_TYPE_FC = "fusion_fc";
-const char *G_OP_TYPE_FC_INT8 = "fusion_fc_int8";
 const char *G_OP_TYPE_FUSION_CONV_ADD = "fusion_conv_add";
 const char *G_OP_TYPE_LRN = "lrn";
 const char *G_OP_TYPE_MUL = "mul";
@@ -119,13 +117,11 @@ std::unordered_map<
        {G_OP_TYPE_MULTICLASS_NMS, {{"BBoxes", "Scores"}, {"Out"}}},
        {G_OP_TYPE_POLYGON_BOX_TRANSFORM, {{"Input"}, {"Output"}}},
        {G_OP_TYPE_FC, {{"X", "Y", "Z"}, {"Out"}}},
-        {G_OP_TYPE_FC_INT8, {{"X", "Y", "Z", "Scale"}, {"Out"}}},
        {G_OP_TYPE_RESHAPE, {{"X"}, {"Out"}}},
        {G_OP_TYPE_RESHAPE2, {{"X"}, {"Out", "XShape"}}},
        {G_OP_TYPE_DEPTHWISE_CONV, {{"Input"}, {"Output"}}},
        {G_OP_TYPE_FILL_CONSTANT, {{}, {"Out"}}},
        {G_OP_TYPE_FUSION_CONV_ADD_RELU, {{"Input"}, {"Out"}}},
-        {G_OP_TYPE_FUSION_CONV_ADD_RELU_INT8, {{"Input", "Scale"}, {"Out"}}},
        {G_OP_TYPE_FUSION_CONV_ADD_PRELU, {{"Input"}, {"Out"}}},
        {G_OP_TYPE_FUSION_CONV_ADD_ADD_PRELU, {{"Input"}, {"Out"}}},
        {G_OP_TYPE_IM2SEQUENCE, {{"X"}, {"Out"}}},

--- a/src/common/types.h
+++ b/src/common/types.h
@@ -108,11 +108,9 @@ extern const char *G_OP_TYPE_BOX_CODER;
 extern const char *G_OP_TYPE_CONCAT;
 extern const char *G_OP_TYPE_ELEMENTWISE_ADD;
 extern const char *G_OP_TYPE_FUSION_CONV_ADD_RELU;
-extern const char *G_OP_TYPE_FUSION_CONV_ADD_RELU_INT8;
 extern const char *G_OP_TYPE_FUSION_CONV_ADD_PRELU;
 extern const char *G_OP_TYPE_FUSION_CONV_ADD_ADD_PRELU;
 extern const char *G_OP_TYPE_FC;
-extern const char *G_OP_TYPE_FC_INT8;
 extern const char *G_OP_TYPE_FUSION_CONV_ADD;
 extern const char *G_OP_TYPE_FUSION_CONV_ADD_BN_RELU;
 extern const char *G_OP_TYPE_FUSION_CONV_BN_ADD_RELU;

--- a/src/framework/op_registry.h
+++ b/src/framework/op_registry.h
@@ -98,24 +98,6 @@ class OpRegistry {
  }
 };

-#define REGISTER_OPERATOR_INT8(op_type, op_class, device_name, device_type) \
-  template class op_class<device_type, int8_t>;                             \
-  template <typename Dtype, typename T>                                     \
-  class _OpClass_##op_type##_##device_name : public op_class<Dtype, T> {    \
-   public:                                                                  \
-    DEFINE_OP_CONSTRUCTOR(_OpClass_##op_type##_##device_name, op_class);    \
-  };                                                                        \
-  static paddle_mobile::framework::OperatorRegistrar<                       \
-      device_type, _OpClass_##op_type##_##device_name<device_type, int8_t>> \
-      __op_registrar_##op_type##_##device_name(#op_type);                   \
-  int TouchOpRegistrar_##op_type##_##device_name() {                        \
-    __op_registrar_##op_type##_##device_name.Touch();                       \
-    return 0;                                                               \
-  }
-
-#define REGISTER_OPERATOR_CPU_INT8(op_type, op_class) \
-  REGISTER_OPERATOR_INT8(op_type, op_class, cpu, paddle_mobile::CPU);
-
 #define REGISTER_OPERATOR(op_type, op_class, device_name, device_type)     \
  template class op_class<device_type, float>;                             \
  template <typename Dtype, typename T>                                    \

--- a/src/operators/fusion_conv_add_relu_int8_op.cpp
+++ b/src/operators/fusion_conv_add_relu_int8_op.cpp
-/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License. */
-
-#ifdef FUSION_CONVADDRELU_INT8_OP
-
-#include "operators/fusion_conv_add_relu_int8_op.h"
-#include <vector>
-#include "operators/math/conv_func.h"
-
-namespace paddle_mobile {
-namespace operators {
-
-template <typename Dtype, typename T>
-void FusionConvAddReluInt8Op<Dtype, T>::InferShape() const {
-  auto in_dims = this->param_.Input()->dims();
-  auto filter_dims = this->param_.Filter()->dims();
-  const std::vector<int> &strides = this->param_.Strides();
-  std::vector<int> paddings = this->param_.Paddings();
-  int groups = this->param_.Groups();
-  std::vector<int> dilations = this->param_.Dilations();
-
-  PADDLE_MOBILE_ENFORCE((in_dims.size() == filter_dims.size() &&
-                         dilations.size() == paddings.size() &&
-                         paddings.size() == strides.size()),
-                        "ConvParam is not suitable");
-
-  std::vector<int64_t> output_shape({in_dims[0], filter_dims[0]});
-  for (size_t i = 0; i < strides.size(); ++i) {
-    output_shape.push_back(
-        math::ConvOutputSize(in_dims[i + 2], filter_dims[i + 2], dilations[i],
-                             paddings[i], strides[i]));
-  }
-  framework::DDim ddim = framework::make_ddim(output_shape);
-  this->param_.Output()->Resize(ddim);
-}
-
-}  // namespace operators
-}  // namespace paddle_mobile
-
-namespace ops = paddle_mobile::operators;
-#ifdef PADDLE_MOBILE_CPU
-REGISTER_OPERATOR_CPU_INT8(fusion_conv_add_relu_int8,
-                           ops::FusionConvAddReluInt8Op);
-#endif
-#endif  // FUSION_CONVADDRELU_INT8_OP
--- a/src/operators/fusion_conv_add_relu_int8_op.h
+++ b/src/operators/fusion_conv_add_relu_int8_op.h
-/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License. */
-
-#ifdef FUSION_CONVADDRELU_INT8_OP
-#pragma once
-#include <string>
-#include "framework/operator.h"
-#include "operators/kernel/conv_add_relu_kernel.h"
-#include "operators/op_param.h"
-namespace paddle_mobile {
-namespace operators {
-template <typename DeviceType, typename T>
-class FusionConvAddReluInt8Op
-    : public framework::OperatorWithKernel<DeviceType,
-                                           FusionConvAddReluParam<DeviceType>,
-                                           ConvAddReluKernel<DeviceType, T>> {
- public:
-  FusionConvAddReluInt8Op(const std::string &type,
-                          const VariableNameMap &inputs,
-                          const VariableNameMap &outputs,
-                          const framework::AttributeMap &attrs,
-                          std::shared_ptr<framework::Scope> scope)
-      : framework::OperatorWithKernel<DeviceType,
-                                      FusionConvAddReluParam<DeviceType>,
-                                      ConvAddReluKernel<DeviceType, T>>(
-            type, inputs, outputs, attrs, scope) {}
-  void InferShape() const override;
-};
-}  // namespace operators
-}  // namespace paddle_mobile
-#endif  // FUSION_CONVADDRELU_INT8_OP
--- a/src/operators/fusion_fc_int8_op.cpp
+++ b/src/operators/fusion_fc_int8_op.cpp
-/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License. */
-
-#ifdef FUSION_FC_INT8_OP
-
-#include "operators/fusion_fc_int8_op.h"
-
-namespace paddle_mobile {
-namespace operators {
-
-template <typename Dtype, typename T>
-void FusionFcInt8Op<Dtype, T>::InferShape() const {
-  auto x_dims = this->param_.InputX()->dims();
-  auto y_dims = this->param_.InputY()->dims();
-  int x_num_col_dims = this->param_.XNumColDims();
-  int y_num_col_dims = this->param_.YNumColDims();
-
-  assert(x_dims.size() > x_num_col_dims);
-  assert(y_dims.size() > y_num_col_dims);
-
-  /// (1,2,3,4) , x_num_col_dims = 2  -> (2,12)
-  auto x_mat_dims = framework::flatten_to_2d(x_dims, x_num_col_dims);
-  auto y_mat_dims = framework::flatten_to_2d(y_dims, y_num_col_dims);
-
-  assert(x_mat_dims[1] == y_mat_dims[0]);
-
-  std::vector<int64_t> output_dims;
-  output_dims.reserve(
-      static_cast<size_t>(x_num_col_dims + y_dims.size() - y_num_col_dims));
-
-  for (int i = 0; i < x_num_col_dims; ++i) {
-    output_dims.push_back(x_dims[i]);
-  }
-
-  for (int i = y_num_col_dims; i < y_dims.size(); ++i) {
-    output_dims.push_back(y_dims[i]);
-  }
-
-  framework::DDim ddim = framework::make_ddim(output_dims);
-  this->param_.Out()->Resize(ddim);
-}
-
-}  // namespace operators
-}  // namespace paddle_mobile
-
-namespace ops = paddle_mobile::operators;
-#ifdef PADDLE_MOBILE_CPU
-REGISTER_OPERATOR_CPU_INT8(fusion_fc_int8, ops::FusionFcInt8Op);
-#endif
-#endif  // FUSION_FC_INT8_OP
--- a/src/operators/fusion_fc_int8_op.h
+++ b/src/operators/fusion_fc_int8_op.h
-/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License. */
-
-#ifdef FUSION_FC_INT8_OP
-
-#pragma once
-
-#include <string>
-#include <vector>
-
-#include "framework/operator.h"
-#include "framework/program/program-optimize/fusion_op_register.h"
-#include "operators/kernel/fusion_fc_kernel.h"
-#include "operators/op_param.h"
-
-namespace paddle_mobile {
-namespace operators {
-
-template <typename DeviceType, typename T>
-class FusionFcInt8Op
-    : public framework::OperatorWithKernel<DeviceType,
-                                           FusionFcParam<DeviceType>,
-                                           FusionFcKernel<DeviceType, T>> {
- public:
-  FusionFcInt8Op(const std::string &type, const VariableNameMap &inputs,
-                 const VariableNameMap &outputs,
-                 const framework::AttributeMap &attrs,
-                 std::shared_ptr<framework::Scope> scope)
-      : framework::OperatorWithKernel<DeviceType, FusionFcParam<DeviceType>,
-                                      FusionFcKernel<DeviceType, T>>(
-            type, inputs, outputs, attrs, scope) {}
-
-  void InferShape() const override;
-};
-
-}  // namespace operators
-}  // namespace paddle_mobile
-
-#endif  // FUSION_FC_INT8_OP
--- a/src/operators/kernel/arm/conv_add_relu_kernel.cpp
+++ b/src/operators/kernel/arm/conv_add_relu_kernel.cpp
@@ -32,20 +32,6 @@ void ConvAddReluKernel<CPU, float>::Compute(
 }
 template class ConvAddReluKernel<CPU, float>;

-#ifdef FUSION_CONVADDRELU_INT8_OP
-template <>
-bool ConvAddReluKernel<CPU, int8_t>::Init(FusionConvAddReluParam<CPU> *param) {
-  return true;
-}
-
-template <>
-void ConvAddReluKernel<CPU, int8_t>::Compute(
-    const FusionConvAddReluParam<CPU> &param) {
-  ConvAddReluCompute<int8_t, int32_t>(param);
-}
-template class ConvAddReluKernel<CPU, int8_t>;
-#endif
-
 }  // namespace operators
 }  // namespace paddle_mobile


--- a/src/operators/kernel/central-arm-func/conv_add_relu_arm_func.h
+++ b/src/operators/kernel/central-arm-func/conv_add_relu_arm_func.h
@@ -37,12 +37,6 @@ void ConvAddReluCompute(const FusionConvAddReluParam<CPU> &param) {

  float alpha = 1.0f;
  float beta = 1.0f;
-
-#ifdef FUSION_CONVADDRELU_INT8_OP
-  alpha = param.InputScale()->data<float>()[0];
-  beta = 0.0f;
-#endif
-
  int32_t groups = param.Groups();
  std::vector<int32_t> strides = param.Strides();
  std::vector<int32_t> paddings = param.Paddings();

--- a/src/operators/kernel/central-arm-func/fusion_fc_arm_func.h
+++ b/src/operators/kernel/central-arm-func/fusion_fc_arm_func.h
@@ -37,7 +37,6 @@ void FusionFcCompute(const FusionFcParam<CPU> &param) {

  float alpha = 1.0f;
  float beta = 1.0f;
-
  const Tensor x_matrix =
      input_x->dims().size() > 2
          ? framework::ReshapeToMatrix(*input_x, param.XNumColDims())
@@ -57,28 +56,14 @@ void FusionFcCompute(const FusionFcParam<CPU> &param) {
  axis = (axis == -1 ? out_dim.size() - input_z->dims().size() : axis);
  PADDLE_MOBILE_ENFORCE(axis == 1, " to fit broadcast, axis = 1. ");

-  if (std::is_same<P, int8_t>::value) {
-#ifdef FUSION_FC_INT8_OP
-    alpha = param.InputScale()->data<float>()[0];
-    beta = 0.0f;
-    math::matmul(x_matrix, false, y_matrix, false, alpha, out, beta, false,
-                 input_z_data, true);
-#endif
-  } else {
  // bias_data的维度和out的第二个维度一致
  int64_t classes = input_z->numel();
  for (int i = 0; i < out_dim[0]; i++) {
-      memory::Copy(out_data + i * classes, input_z_data,
-                   sizeof(float) * classes);
+    memory::Copy(out_data + i * classes, input_z_data, sizeof(float) * classes);
  }

  math::matmul<float>(x_matrix, false, y_matrix, false, alpha, out, beta,
                      false);
-  }
-  PADDLE_MOBILE_ENFORCE(out_dim.size() == 2, " out_dim.size must be 2.");
-  //  if (out_dim.size() != 2) {
-  //      out->Resize(out_dim);
-  //  }
 }

 }  // namespace operators

--- a/src/operators/math/pooling.cpp
+++ b/src/operators/math/pooling.cpp
@@ -14,7 +14,9 @@ limitations under the License. */

 #ifdef POOL_OP

-#include "pooling.h"
+#include "operators/math/pooling.h"
+#include <algorithm>
+#include <vector>
 #include "common/types.h"
 #ifdef _OPENMP
 #include <omp.h>
@@ -60,7 +62,7 @@ class PoolFunctor<CPU, PoolProcess, T> {
    T *output_data = output->mutable_data<T>();
    for (int i = 0; i < batch_size; i++) {
      for (int c = 0; c < output_channels; ++c) {
-#pragma omp parallel for
+        #pragma omp parallel for
        for (int ph = 0; ph < output_height; ++ph) {
          int hstart = ph * stride_height - padding_height;
          int hend = std::min(hstart + ksize_height, input_height);

--- a/src/operators/op_param.h
+++ b/src/operators/op_param.h
@@ -1632,10 +1632,6 @@ class FusionFcParam : public OpParam {
    x_num_col_dims_ = GetAttr<int>("x_num_col_dims", attrs);
    y_num_col_dims_ = GetAttr<int>("y_num_col_dims", attrs);
    axis_ = GetAttr<int>("axis", attrs);
-
-#ifdef FUSION_FC_INT8_OP
-    scale_ = InputScaleFrom<GType>(inputs, scope);
-#endif
  }
  GType *InputX() const { return input_x_; }

@@ -1660,16 +1656,8 @@ class FusionFcParam : public OpParam {
  int y_num_col_dims_;
  int axis_;

-#ifdef FUSION_FC_INT8_OP
- public:
-  const RType *InputScale() const { return scale_; }
-
- private:
-  RType *scale_;
-#endif
-
 #ifdef PADDLE_MOBILE_FPGA
- private:
+ private:  // NOLINT
  fpga::SplitConvArgs fpga_conv_args;

 public:
@@ -1719,19 +1707,7 @@ class FusionConvAddReluParam : public FusionConvAddParam<DeviceType> {
  FusionConvAddReluParam(const VariableNameMap &inputs,
                         const VariableNameMap &outputs,
                         const AttributeMap &attrs, const Scope &scope)
-      : FusionConvAddParam<DeviceType>(inputs, outputs, attrs, scope) {
-#ifdef FUSION_CONVADDRELU_INT8_OP
-    scale_ = OpParam::InputScaleFrom<GType>(inputs, scope);
-#endif
-  }
-#ifdef FUSION_CONVADDRELU_INT8_OP
-  typedef typename DtypeTensorTrait<DeviceType>::gtype GType;
-  typedef typename DtypeTensorTrait<DeviceType>::rtype RType;
-  const RType *InputScale() const { return scale_; }
-
- private:
-  RType *scale_;
-#endif
+      : FusionConvAddParam<DeviceType>(inputs, outputs, attrs, scope) {}
 };
 #endif


--- a/test/CMakeLists.txt
+++ b/test/CMakeLists.txt
@@ -324,10 +324,6 @@ if (NOT FOUND_MATCH)
    ADD_EXECUTABLE(test-conv-add-relu-op operators/test_conv_add_relu_op.cpp test_helper.h test_include.h executor_for_test.h)
    target_link_libraries(test-conv-add-relu-op paddle-mobile)

-    # gen test
-    ADD_EXECUTABLE(test-conv-add-relu-int8-op operators/test_fusion_conv_add_relu_int8_op.cpp test_helper.h test_include.h)
-    target_link_libraries(test-conv-add-relu-int8-op paddle-mobile)
-
    # gen test
    ADD_EXECUTABLE(test-conv-add-bn-relu-op operators/test_fusion_conv_add_bn_relu_op.cpp test_helper.h test_include.h executor_for_test.h)
    target_link_libraries(test-conv-add-bn-relu-op paddle-mobile)

--- a/test/net/test_benchmark.cpp
+++ b/test/net/test_benchmark.cpp
@@ -13,6 +13,7 @@ See the License for the specific language governing permissions and
 limitations under the License. */

 #include <iostream>
+#include <sstream>
 #include "../test_helper.h"
 #include "../test_include.h"


--- a/test/operators/test_fusion_conv_add_relu_int8_op.cpp
+++ b/test/operators/test_fusion_conv_add_relu_int8_op.cpp
-/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License. */
-
-#include <iostream>
-
-#ifdef FUSION_CONVADDRELU_INT8_OP
-#include <limits>
-#include "../test_helper.h"
-#include "../test_include.h"
-#include "operators/fusion_conv_add_relu_int8_op.h"
-
-namespace paddle_mobile {
-int32_t qadd_int32(int32_t l, int32_t r) {
-  int64_t res = static_cast<int64_t>(l) + static_cast<int64_t>(r);
-  if (res > std::numeric_limits<int32_t>::max())
-    return std::numeric_limits<int32_t>::max();
-  else if (res < std::numeric_limits<int32_t>::min())
-    return std::numeric_limits<int32_t>::min();
-  else
-    return static_cast<int32_t>(res);
-}
-
-// round to zero
-float round2zero(float v) {
-  float res;
-  if (v > 0)
-    res = std::floor(v);
-  else if (v < 0)
-    res = std::ceil(v);
-  return res;
-}
-
-int8_t qscale_int32(int32_t v, float scale) {
-  float res = static_cast<float>(v) * scale;
-  res = round2zero(res);
-  if (res > 127)
-    return static_cast<int8_t>(127);
-  else if (res < -127)
-    return static_cast<int8_t>(-127);
-  else
-    return static_cast<int8_t>(res);
-}
-
-// Reference convolution from Caffe for checking results.
-// accumulate through explicit loops over input, output, and filters.
-template <typename T>
-void conv2d(const framework::Tensor *input, const framework::Tensor *filter,
-            const framework::Tensor *bias, const framework::AttributeMap &attrs,
-            framework::Tensor *output, float scale) {
-  framework::AttrReader attr_reader(attrs);
-  std::vector<int> paddings = attr_reader.Get<std::vector<int>>("paddings");
-  std::vector<int> strides = attr_reader.Get<std::vector<int>>("strides");
-  std::vector<int> dilations = attr_reader.Get<std::vector<int>>("dilations");
-  int groups = attr_reader.Get<int>("groups");
-  int kernel_h = filter->dims()[2];
-  int kernel_w = filter->dims()[3];
-  int pad_h = paddings[0];
-  int pad_w = paddings[1];
-  int stride_h = strides[0];
-  int stride_w = strides[1];
-  int dilation_h = dilations[0];
-  int dilation_w = dilations[1];
-  auto in_shape = input->dims();
-  auto out_shape = output->dims();
-
-  const bool has_depth = 0;
-  int kernel_d, pad_d, stride_d, dilation_d;
-  if (has_depth) {
-    kernel_d = kernel_h;
-    stride_d = stride_h;
-    pad_d = pad_h;
-    dilation_d = dilation_h;
-  } else {
-    kernel_d = stride_d = dilation_d = 1;
-    pad_d = 0;
-  }
-  // Groups
-  int o_g = out_shape[1] / groups;
-  int k_g = in_shape[1] / groups;
-  int o_head, k_head;
-  // Convolution
-  vector<int> weight_offset(4 + has_depth);
-  vector<int> in_offset(4 + has_depth);
-  vector<int> out_offset(4 + has_depth);
-  auto offset = [](const framework::Tensor *input, const vector<int> &indics) {
-    framework::DDim shape = input->dims();
-    size_t count = 0;
-    for (int i = 0; i < indics.size(); ++i) {
-      count *= shape[i];
-      count += indics[i];
-    }
-    return count;
-  };
-
-  const T *in_data = input->data<T>();
-  const T *w_data = filter->data<T>();
-  framework::Tensor output_32;
-  int32_t *out_data_32 = output_32.mutable_data<int32_t>(out_shape);
-  memset(out_data_32, 0, output_32.numel() * sizeof(int32_t));
-  for (int n = 0; n < out_shape[0]; n++) {
-    for (int g = 0; g < groups; g++) {
-      o_head = o_g * g;
-      k_head = k_g * g;
-      for (int o = 0; o < o_g; o++) {
-        for (int k = 0; k < k_g; k++) {
-          for (int z = 0; z < (has_depth ? out_shape[2] : 1); z++) {
-            for (int y = 0; y < out_shape[2 + has_depth]; y++) {
-              for (int x = 0; x < out_shape[3 + has_depth]; x++) {
-                for (int r = 0; r < kernel_d; r++) {
-                  for (int p = 0; p < kernel_h; p++) {
-                    for (int q = 0; q < kernel_w; q++) {
-                      int in_z = z * stride_d - pad_d + r * dilation_d;
-                      int in_y = y * stride_h - pad_h + p * dilation_h;
-                      int in_x = x * stride_w - pad_w + q * dilation_w;
-                      if (in_z >= 0 && in_z < (has_depth ? in_shape[2] : 1) &&
-                          in_y >= 0 && in_y < in_shape[2 + has_depth] &&
-                          in_x >= 0 && in_x < in_shape[3 + has_depth]) {
-                        weight_offset[0] = o + o_head;
-                        weight_offset[1] = k;
-                        if (has_depth) {
-                          weight_offset[2] = r;
-                        }
-                        weight_offset[2 + has_depth] = p;
-                        weight_offset[3 + has_depth] = q;
-                        in_offset[0] = n;
-                        in_offset[1] = k + k_head;
-                        if (has_depth) {
-                          in_offset[2] = in_z;
-                        }
-                        in_offset[2 + has_depth] = in_y;
-                        in_offset[3 + has_depth] = in_x;
-                        out_offset[0] = n;
-                        out_offset[1] = o + o_head;
-                        if (has_depth) {
-                          out_offset[2] = z;
-                        }
-                        out_offset[2 + has_depth] = y;
-                        out_offset[3 + has_depth] = x;
-
-                        out_data_32[offset(output, out_offset)] +=
-                            in_data[offset(input, in_offset)] *
-                            w_data[offset(filter, weight_offset)];
-                      }
-                    }
-                  }
-                }
-              }
-            }
-          }
-        }
-      }
-    }
-  }
-
-  T *out_data = output->mutable_data<T>();
-  int32_t n = out_shape[0];
-  int32_t c = out_shape[1];
-  int32_t h = out_shape[2];
-  int32_t w = out_shape[3];
-  const int32_t *bias_data = bias->data<int32_t>();
-  for (int i = 0; i < n; ++i) {
-    for (int j = 0; j < c; ++j) {
-      int32_t bias_v = bias_data[j];
-      for (int k = 0; k < h; ++k) {
-        for (int l = 0; l < w; ++l) {
-          int32_t tmp = out_data_32[i * c * h * w + j * h * w + k * w + l];
-          tmp = qadd_int32(tmp, bias_v);
-          tmp = std::max(0, tmp);
-          out_data[i * c * h * w + j * h * w + k * w + l] =
-              qscale_int32(tmp, scale);
-        }
-      }
-    }
-  }
-}
-
-template <typename T, int Kernel, int Pad, int Stride>
-int TestConvOp(int in_channels, int in_height, int in_width, int out_channels) {
-  int kernel_h = Kernel;
-  int kernel_w = Kernel;
-  int pad_h = Pad;
-  int pad_w = Pad;
-  int stride_h = Stride;
-  int stride_w = Stride;
-  int dilation_h = 1;
-  int dilation_w = 1;
-
-  int batch_size = 1;
-  int input_c = in_channels;
-  int input_h = in_height;
-  int input_w = in_width;
-  int output_c = out_channels;
-  framework::DDim input_shape =
-      framework::make_ddim({batch_size, input_c, input_h, input_w});
-  framework::DDim filter_shape =
-      framework::make_ddim({output_c, input_c, kernel_h, kernel_w});
-
-  int kernel_extent_h = dilation_h * (kernel_h - 1) + 1;
-  int kernel_extent_w = dilation_w * (kernel_w - 1) + 1;
-  int output_h = (input_h + 2 * pad_h - kernel_extent_h) / stride_h + 1;
-  int output_w = (input_w + 2 * pad_w - kernel_extent_w) / stride_w + 1;
-  framework::DDim output_shape = framework::make_ddim(
-      std::vector<int>({batch_size, output_c, output_h, output_w}));
-
-  framework::DDim bias_shape = framework::make_ddim({output_c});
-
-  VariableNameMap inputs;
-  VariableNameMap outputs;
-  auto scope = std::make_shared<framework::Scope>();
-  inputs["Input"] = std::vector<std::string>({"input"});
-  inputs["Filter"] = std::vector<std::string>({"filter"});
-  inputs["Scale"] = std::vector<std::string>({"scale"});
-  inputs["Y"] = std::vector<std::string>({"bias"});
-  outputs["Out"] = std::vector<std::string>({"output"});
-
-  auto input_var = scope.get()->Var("input");
-  auto input = input_var->template GetMutable<framework::LoDTensor>();
-  SetupTensor<T>(input, input_shape, -127, 127);
-
-  auto filter_var = scope.get()->Var("filter");
-  auto filter = filter_var->template GetMutable<framework::LoDTensor>();
-  SetupTensor<T>(filter, filter_shape, -127, 127);
-
-  auto scale_var = scope.get()->Var("scale");
-  auto scale = scale_var->template GetMutable<framework::LoDTensor>();
-  scale->Resize(framework::make_ddim({1}));
-  float scale_v = 0.000828f;
-  scale->mutable_data<float>()[0] = scale_v;
-
-  auto bias_var = scope.get()->Var("bias");
-  auto bias = bias_var->template GetMutable<framework::LoDTensor>();
-  SetupTensor<int32_t>(bias, bias_shape, -127, 127);
-
-  auto output_var = scope.get()->Var("output");
-  framework::AttributeMap attrs;
-  attrs["strides"].Set<vector<int>>(std::vector<int>({stride_h, stride_w}));
-  attrs["paddings"].Set<vector<int>>(std::vector<int>({pad_h, pad_w}));
-  attrs["dilations"].Set<vector<int>>(
-      std::vector<int>({dilation_h, dilation_w}));
-  attrs["groups"].Set<int>(1);
-  attrs["axis"].Set<int>(0);
-
-  auto *op = new operators::FusionConvAddReluInt8Op<CPU, T>(
-      "fusion_conv_add_relu_int8", inputs, outputs, attrs, scope);
-  op->InferShape();
-  op->Init();
-  op->Run();
-
-  framework::Tensor output_cmp;
-  output_cmp.mutable_data<T>(output_shape);
-  conv2d<T>(input, filter, bias, attrs, &output_cmp, scale_v);
-
-  // compare results
-  int eq = 0;
-  int neq = 0;
-  auto output = output_var->template Get<framework::LoDTensor>();
-  const T *output_data = output->data<T>();
-  T *output_cmp_data = output_cmp.data<T>();
-  for (int i = 0; i < output->numel(); ++i) {
-    PADDLE_MOBILE_ENFORCE(
-        output_data[i] == output_cmp_data[i],
-        "The execution of test_fusion_conv_add_relu_int8_op is failed!");
-    if (output_data[i] == output_cmp_data[i]) {
-      ++eq;
-    } else {
-      ++neq;
-    }
-  }
-  std::cout << "eq = " << eq << ", neq = " << neq << std::endl;
-  delete op;
-  return 0;
-}
-
-}  // namespace paddle_mobile
-
-int main(int argc, char *argv[]) {
-  if (argc < 5) {
-    LOG(paddle_mobile::kLOG_INFO)
-        << "Usage:\n"
-        << "  ./test-conv-add-relu-int8-op in_channels in_height in_width "
-           "out_channels\n"
-        << "  params:\n"
-        << "   -in_channels: int, input image's channels\n"
-        << "   -in_height: int, input image's height\n"
-        << "   -in_width: int, input image's width\n"
-        << "   -out_channels: int, conv output channels\n";
-    return 1;
-  }
-  int in_channels = atoi(argv[1]);
-  int in_height = atoi(argv[2]);
-  int in_width = atoi(argv[3]);
-  int out_channels = atoi(argv[4]);
-  // kernel = 3, pad = 1, stride = 1
-  LOG(paddle_mobile::kLOG_INFO) << "int8_t, kernel=3, pad=1, stride=1";
-  paddle_mobile::TestConvOp<int8_t, 3, 1, 1>(in_channels, in_height, in_width,
-                                             out_channels);
-  // kernel = 7, pad = 0, stride = 2
-  LOG(paddle_mobile::kLOG_INFO) << "int8, kernel=7, pad=0, stride=2";
-  paddle_mobile::TestConvOp<int8_t, 7, 0, 2>(in_channels, in_height, in_width,
-                                             out_channels);
-  // kernel = 7, pad = 1, stride = 2
-  LOG(paddle_mobile::kLOG_INFO) << "int8, kernel=7, pad=1, stride=2";
-  paddle_mobile::TestConvOp<int8_t, 7, 1, 2>(in_channels, in_height, in_width,
-                                             out_channels);
-  // kernel = 7, pad = 3, stride = 2
-  LOG(paddle_mobile::kLOG_INFO) << "int8, kernel=7, pad=3, stride=2";
-  paddle_mobile::TestConvOp<int8_t, 7, 3, 2>(in_channels, in_height, in_width,
-                                             out_channels);
-  // kernel = 7, pad = 0, stride = 1
-  LOG(paddle_mobile::kLOG_INFO) << "int8, kernel=7, pad=0, stride=1";
-  paddle_mobile::TestConvOp<int8_t, 7, 0, 1>(in_channels, in_height, in_width,
-                                             out_channels);
-  // kernel = 7, pad = 1, stride = 1
-  LOG(paddle_mobile::kLOG_INFO) << "int8, kernel=7, pad=1, stride=1";
-  paddle_mobile::TestConvOp<int8_t, 7, 1, 1>(in_channels, in_height, in_width,
-                                             out_channels);
-  // kernel = 7, pad = 3, stride = 1
-  LOG(paddle_mobile::kLOG_INFO) << "int8, kernel=7, pad=3, stride=1";
-  paddle_mobile::TestConvOp<int8_t, 7, 3, 1>(in_channels, in_height, in_width,
-                                             out_channels);
-  // kernel = 7, pad = 5, stride = 3
-  LOG(paddle_mobile::kLOG_INFO) << "int8, kernel=7, pad=5, stride=3";
-  paddle_mobile::TestConvOp<int8_t, 7, 5, 3>(in_channels, in_height, in_width,
-                                             out_channels);
-  // kernel = 7, pad = 3, stride = 4
-  LOG(paddle_mobile::kLOG_INFO) << "int8, kernel=7, pad=3, stride=4";
-  paddle_mobile::TestConvOp<int8_t, 7, 3, 4>(in_channels, in_height, in_width,
-                                             out_channels);
-  // kernel = 3, pad = 0, stride = 1
-  LOG(paddle_mobile::kLOG_INFO) << "int8, kernel=3, pad=0, stride=1";
-  paddle_mobile::TestConvOp<int8_t, 3, 0, 1>(in_channels, in_height, in_width,
-                                             out_channels);
-  // kernel = 3, pad = 1, stride = 1
-  LOG(paddle_mobile::kLOG_INFO) << "int8, kernel=3, pad=1, stride=1";
-  paddle_mobile::TestConvOp<int8_t, 3, 1, 1>(in_channels, in_height, in_width,
-                                             out_channels);
-
-  // kernel = 5, pad = 0, stride = 1
-  LOG(paddle_mobile::kLOG_INFO) << "int8, kernel=5, pad=0, stride=1";
-  paddle_mobile::TestConvOp<int8_t, 5, 0, 1>(in_channels, in_height, in_width,
-                                             out_channels);
-
-  // kernel = 5, pad = 2, stride = 1
-  LOG(paddle_mobile::kLOG_INFO) << "int8, kernel=5, pad=2, stride=1";
-  paddle_mobile::TestConvOp<int8_t, 5, 2, 1>(in_channels, in_height, in_width,
-                                             out_channels);
-}
-#else
-int main() {
-  std::cout << "FUSION_CONVADDRELU_INT8_OP is not defined!" << std::endl;
-  return 0;
-}
-#endif
--- a/test/operators/test_fusion_fc_op.cpp
+++ b/test/operators/test_fusion_fc_op.cpp
@@ -17,7 +17,6 @@ limitations under the License. */
 #include "../test_helper.h"
 #include "../test_include.h"
 #include "framework/operator.h"
-#include "operators/fusion_fc_int8_op.h"
 #include "operators/fusion_fc_op.h"

 #define a(i, j) a[(i)*lda + (j)]
@@ -103,18 +102,8 @@ int TestFcOP() {
  attrs["y_num_col_dims"].Set<int>(1);
  attrs["axis"].Set<int>(1);
  operators::OperatorBase<CPU> *op = nullptr;
-#ifdef FUSION_FC_INT8_OP
-  if (std::is_same<T, int8_t>::value) {
-    op = new operators::FusionFcInt8Op<CPU, T>("fusion_fc_int8", inputs,
-                                               outputs, attrs, scope);
-  } else {
-    op = new operators::FusionFcOp<CPU, T>("fusion_fc", inputs, outputs, attrs,
-                                           scope);
-  }
-#else
  op = new operators::FusionFcOp<CPU, T>("fusion_fc", inputs, outputs, attrs,
                                         scope);
-#endif
  op->InferShape();
  op->Run();
  auto output = output_var->template Get<framework::LoDTensor>();
@@ -166,9 +155,6 @@ int TestFcOP() {
 int main() {
  paddle_mobile::PaddleMobile<paddle_mobile::CPU> paddle_mobile;
  paddle_mobile.SetThreadNum(4);
-#ifdef FUSION_FC_INT8_OP
-  paddle_mobile::TestFcOP<int8_t, int32_t>();
-#endif
  paddle_mobile::TestFcOP<float, float>();
  return 0;
 }