Refine: use im2col+gemm to implement int8 3x3 and 5x5 conv, this will get higher performance

704c1148 · hjchen2 · 492d60a5 · 704c1148 · 704c1148
隐藏空白更改
内联并排

Showing with 9 addition and 8 deletion

src/operators/kernel/central-arm-func/conv_arm_func.h src/operators/kernel/central-arm-func/conv_arm_func.h +9 -4

src/operators/math/conv_arm_int8.h src/operators/math/conv_arm_int8.h +0 -4

未找到文件。
--- a/src/operators/kernel/central-arm-func/conv_arm_func.h
+++ b/src/operators/kernel/central-arm-func/conv_arm_func.h
@@ -120,10 +120,15 @@ inline void ConvCompute_int8(const ConvParam<CPU> &param) {
  typedef void (*ConvFunc)(const Tensor &input, const Tensor &kernel,
                           Tensor *output);
  static ConvFunc conv_funcs_table[7][5] = {
-      {0, 0, 0, 0, 0},                                // k = 1
-      {0, 0, 0, 0, 0}, {conv3x3s1_int8, 0, 0, 0, 0},  // k = 3
-      {0, 0, 0, 0, 0}, {conv5x5s1_int8, 0, 0, 0, 0},  // k = 5
-      {0, 0, 0, 0, 0}, {0, 0, 0, 0, 0},               // k = 7
+      {0, 0, 0, 0, 0},  // k = 1
+      //      {0, 0, 0, 0, 0}, {conv3x3s1_int8, 0, 0, 0, 0},  // k = 3
+      //      {0, 0, 0, 0, 0}, {conv5x5s1_int8, 0, 0, 0, 0},  // k = 5
+      {0, 0, 0, 0, 0},
+      {0, 0, 0, 0, 0},  // k = 3
+      {0, 0, 0, 0, 0},
+      {0, 0, 0, 0, 0},  // k = 5
+      {0, 0, 0, 0, 0},
+      {0, 0, 0, 0, 0},  // k = 7
  };
  const Tensor *input = param.Input();
  Tensor *filter = param.Filter();

--- a/src/operators/math/conv_arm_int8.h
+++ b/src/operators/math/conv_arm_int8.h
@@ -24,10 +24,6 @@ namespace operators {
 void conv3x3s1_int8(const framework::Tensor& input,
                    const framework::Tensor& weight, framework::Tensor* output);

-void conv3x3s1_int8_4c(const framework::Tensor& input,
-                       const framework::Tensor& weight,
-                       framework::Tensor* output);
-
 void conv5x5s1_int8(const framework::Tensor& input,
                    const framework::Tensor& weight, framework::Tensor* output);