Merge pull request #1191 from hjchen2/dev-latest

Refine: use im2col+gemm to implement int8 3x3 and 5x5 conv, this will…

Merge pull request #1191 from hjchen2/dev-latest
Refine: use im2col+gemm to implement int8 3x3 and 5x5 conv, this will…
a8ce7752 · Ray Liu · GitHub · 39c3f250 · ad8aa3c3 · a8ce7752
5 changed file
--- a/src/operators/kernel/central-arm-func/conv_arm_func.h
+++ b/src/operators/kernel/central-arm-func/conv_arm_func.h
@@ -16,7 +16,6 @@ limitations under the License. */
 #pragma once
 #include <vector>
-#include "operators/math/conv_arm_int8.h"
 #include "operators/math/conv_func.h"
 #include "operators/math/depthwise_conv_3x3.h"
 #include "operators/math/im2col.h"
@@ -28,11 +27,12 @@ limitations under the License. */
 namespace paddle_mobile {
 namespace operators {
-template <typename Dtype>
+template <typename Itype, typename Otype>
 inline void ConvBasic(const ConvParam<CPU> &param) {
  const Tensor *input = param.Input();
  Tensor filter = *param.Filter();
  Tensor *output = param.Output();
+  output->mutable_data<Otype>();
  int groups = param.Groups();
  const std::vector<int> strides = param.Strides();
  const std::vector<int> paddings = param.Paddings();
@@ -60,7 +60,7 @@ inline void ConvBasic(const ConvParam<CPU> &param) {
  Tensor col;
  Tensor col_matrix;
  if (is_expand) {
-    col.mutable_data<Dtype>(col_shape);
+    col.mutable_data<Itype>(col_shape);
    col_matrix.ShareDataWith(col);
    col_matrix.Resize(col_matrix_shape);
  }
@@ -79,8 +79,8 @@ inline void ConvBasic(const ConvParam<CPU> &param) {
  int in_step = static_cast<int>(input->dims()[1]) / groups;
  int out_step = static_cast<int>(output->dims()[1]) / groups;
-  math::Vol2ColFunctor<CPU, Dtype> vol2col;
+  math::Vol2ColFunctor<CPU, Itype> vol2col;
-  math::Im2ColFunctor<math::ColFormat::kCFO, CPU, Dtype> im2col;
+  math::Im2ColFunctor<math::ColFormat::kCFO, CPU, Itype> im2col;
  for (int i = 0; i < batch_size; i++) {
    Tensor in_batch = input->Slice(i, i + 1).Resize(input_shape);
@@ -109,69 +109,18 @@ inline void ConvBasic(const ConvParam<CPU> &param) {
      Tensor out_slice = out_batch.Slice(g * out_step, (g + 1) * out_step);
      Tensor filter_slice = filter.Slice(g * out_step, (g + 1) * out_step);
-      math::matmul<Dtype>(filter_slice, false, col_matrix, false,
+      math::matmul<Itype>(filter_slice, false, col_matrix, false,
                          static_cast<float>(1), &out_slice,
                          static_cast<float>(0));
    }
  }
 }
-inline void ConvCompute_int8(const ConvParam<CPU> &param) {
-  typedef void (*ConvFunc)(const Tensor &input, const Tensor &kernel,
-                           Tensor *output);
-  static ConvFunc conv_funcs_table[7][5] = {
-      {0, 0, 0, 0, 0},                                // k = 1
-      {0, 0, 0, 0, 0}, {conv3x3s1_int8, 0, 0, 0, 0},  // k = 3
-      {0, 0, 0, 0, 0}, {conv5x5s1_int8, 0, 0, 0, 0},  // k = 5
-      {0, 0, 0, 0, 0}, {0, 0, 0, 0, 0},               // k = 7
-  };
-  const Tensor *input = param.Input();
-  Tensor *filter = param.Filter();
-  Tensor *output = param.Output();
-  int groups = param.Groups();
-  const std::vector<int> &strides = param.Strides();
-  const std::vector<int> &paddings = param.Paddings();
-  const std::vector<int> &dilations = param.Dilations();
-  int kernel_h = filter->dims()[2];
-  int kernel_w = filter->dims()[3];
-  output->mutable_data<int32_t>();
-  ConvFunc conv_func = 0;
-  if (strides[1] == strides[0] && strides[1] < 6 && kernel_h == kernel_w &&
-      kernel_h < 8 && groups == 1 && dilations[0] == dilations[1] &&
-      dilations[1] == 1) {
-    conv_func = conv_funcs_table[kernel_h - 1][strides[0] - 1];
-  }
-  if (conv_func) {
-    int batch_size = input->dims()[0];
-    math::PadFunctor<CPU, int8_t> pad;
-    Tensor input_pad;
-    for (int i = 0; i < batch_size; ++i) {
-      Tensor in_batch = input->Slice(i, i + 1);
-      Tensor out_batch = output->Slice(i, i + 1);
-      if (paddings[0] == 0 && paddings[1] == 0) {
-        input_pad = in_batch;
-      } else {
-        framework::DDim pad_shape = in_batch.dims();
-        pad_shape[2] += 2 * paddings[0];
-        pad_shape[3] += 2 * paddings[1];
-        input_pad.mutable_data<int8_t>(pad_shape);
-        pad(in_batch, paddings[0], paddings[1], &input_pad);
-      }
-      conv_func(input_pad, *filter, &out_batch);
-    }
-  } else {
-    ConvBasic<int8_t>(param);
-  }
-}
 template <typename P>
 void ConvCompute(const ConvParam<CPU> &param) {
  if (param.Input()->type() == typeid(int8_t)) {
-    ConvCompute_int8(param);
+    ConvBasic<int8_t, int32_t>(param);
  } else {
-    param.Output()->mutable_data<float>();
    if (param.Groups() == param.Input()->dims()[1] &&
        param.Input()->dims()[1] == param.Output()->dims()[1] &&
        param.Filter()->dims()[2] == param.Filter()->dims()[3] &&
@@ -185,7 +134,7 @@ void ConvCompute(const ConvParam<CPU> &param) {
      math::DepthwiseConv3x3(param.Input(), param.Strides(), param.Paddings(),
                             param.Filter(), nullptr, param.Output(), false);
    } else {
-      ConvBasic<float>(param);
+      ConvBasic<float, float>(param);
    }
  }
 }

--- a/src/operators/kernel/central-arm-func/depthwise_conv_arm_func.h
+++ b/src/operators/kernel/central-arm-func/depthwise_conv_arm_func.h
@@ -44,7 +44,7 @@ void DepthwiseConvCompute(const ConvParam<CPU> &param) {
                                 Bias, false);
  } else {
-    ConvBasic<float>(param);
+    ConvBasic<float, float>(param);
  }
 }

--- a/src/operators/math/conv3x3_arm_int8.cpp
+++ b/src/operators/math/conv3x3_arm_int8.cpp
--- a/src/operators/math/conv5x5_arm_int8.cpp
+++ b/src/operators/math/conv5x5_arm_int8.cpp
--- a/src/operators/math/conv_arm_int8.h
+++ b/src/operators/math/conv_arm_int8.h
-/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-    http://www.apache.org/licenses/LICENSE-2.0
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License. */
-#ifdef CONV_OP
-#pragma once
-#include "framework/tensor.h"
-namespace paddle_mobile {
-namespace operators {
-void conv3x3s1_int8(const framework::Tensor& input,
-                    const framework::Tensor& weight, framework::Tensor* output);
-void conv3x3s1_int8_4c(const framework::Tensor& input,
-                       const framework::Tensor& weight,
-                       framework::Tensor* output);
-void conv5x5s1_int8(const framework::Tensor& input,
-                    const framework::Tensor& weight, framework::Tensor* output);
-}  // namespace operators
-}  // namespace paddle_mobile
-#endif