From 781d5fe36280f300313b28f671b85e8ea9cc83ee Mon Sep 17 00:00:00 2001
From: zhaojiaying01 <zhaojiaying01@baidu.com>
Date: Sat, 26 May 2018 17:34:00 +0800
Subject: [PATCH] update conv op kernel

---
 src/operators/kernel/arm/conv_kernel.cpp | 30 ++++++------------------
 1 file changed, 7 insertions(+), 23 deletions(-)
diff --git a/src/operators/kernel/arm/conv_kernel.cpp b/src/operators/kernel/arm/conv_kernel.cpp
index 03558141f9..c8ac141f9c 100644
--- a/src/operators/kernel/arm/conv_kernel.cpp
+++ b/src/operators/kernel/arm/conv_kernel.cpp
@@ -35,14 +35,9 @@ void ConvKernel<CPU, float>::Compute(const ConvParam &param) const {
   LOG(kLOG_DEBUG) << param;
 
   const Tensor *input = param.Input();
-
-  // The filter will be reshaped in the calculations,
-  // so here use an assignment operation,
-  // that avoids modifying the variable in the Scope.
   Tensor filter = *param.Filter();
-
   Tensor *output = param.Output();
-  //            output->mutable_data<T>(context.GetPlace());
+  output->mutable_data<float>();
 
   int groups = param.Groups();
   std::vector<int> strides = param.Strides();
@@ -53,17 +48,9 @@ void ConvKernel<CPU, float>::Compute(const ConvParam &param) const {
 
   const int batch_size = static_cast<int>(input->dims()[0]);
 
-  // filter_shape_vec: {k_o, k_i, k_h, k_w} or {k_o, k_i, k_d, k_h,
-  // k_w}
   std::vector<int64_t> filter_shape_vec(framework::vectorize(filter.dims()));
-  // output_shape_vec: {o_n, o_c, o_h, o_w} or {o_n, o_c, o_d, o_h,
-  // o_w}
   std::vector<int64_t> output_shape_vec(framework::vectorize(output->dims()));
 
-  // use col_shape in the im2col calculation
-  // col_shape_vec: {i_c/g, k_h, k_w, o_h, o_w} or {i_c/g, k_d, k_h,
-  // k_w, o_d,
-  // o_h, o_w}
   size_t data_dim = filter_shape_vec.size() - 2;
   std::vector<int64_t> col_shape_vec(1 + 2 * data_dim);
   col_shape_vec[0] = input->dims()[1] / groups;
@@ -73,24 +60,19 @@ void ConvKernel<CPU, float>::Compute(const ConvParam &param) const {
   }
   framework::DDim col_shape(framework::make_ddim(col_shape_vec));
 
-  // use col_matrix_shape in the gemm calculation
-  // size: (i_c/g * k_h * k_w, o_h * o_w) or (i_c/g * k_d * k_h * k_w,
-  // o_d *
-  // o_h * o_w)
   framework::DDim col_matrix_shape =
       framework::flatten_to_2d(col_shape, data_dim + 1);
 
   bool is_expand = IsExpand(filter_shape_vec, strides, paddings, dilations);
   Tensor col;
-  // col_matrix shares the same piece of data with col,
-  // but will be reshaped into a two-dimensional matrix shape
-  // to call the matrix multiplication interface.
   Tensor col_matrix;
   if (is_expand) {
     col.mutable_data<float>(col_shape);
     col_matrix.ShareDataWith(col);
     col_matrix.Resize(col_matrix_shape);
   }
+  DLOG << " col_shape = " << col_shape;
+  DLOG << " col_matrix_shape = " << col_matrix_shape;
 
   framework::DDim input_shape = framework::slice_ddim(
       input->dims(), 1, static_cast<int>(input->dims().size()));
@@ -98,6 +80,7 @@ void ConvKernel<CPU, float>::Compute(const ConvParam &param) const {
   framework::DDim filter_matrix_shape = {filter.dims()[0],
                                          filter.numel() / filter.dims()[0]};
   filter.Resize(filter_matrix_shape);
+  DLOG << " filter.deims() = " << filter.dims();
 
   framework::DDim output_matrix_shape = {
       output->dims()[1],
@@ -110,8 +93,6 @@ void ConvKernel<CPU, float>::Compute(const ConvParam &param) const {
   math::Vol2ColFunctor<CPU, float> vol2col;
   math::Im2ColFunctor<math::ColFormat::kCFO, CPU, float> im2col;
 
-  //            auto& dev_ctx = context.template
-  //            device_context<DeviceContext>();
   for (int i = 0; i < batch_size; i++) {
     Tensor in_batch = input->Slice(i, i + 1).Resize(input_shape);
     Tensor out_batch = output->Slice(i, i + 1).Resize(output_matrix_shape);
@@ -137,6 +118,9 @@ void ConvKernel<CPU, float>::Compute(const ConvParam &param) const {
       // gemm
       Tensor out_slice = out_batch.Slice(g * out_step, (g + 1) * out_step);
       Tensor filter_slice = filter.Slice(g * out_step, (g + 1) * out_step);
+      DLOG << " out_slice " << out_slice.dims();
+      DLOG << " filter_slice " << filter_slice.dims();
+      DLOG << " col_matrix " << col_matrix.dims();
       math::matmul<float>(filter_slice, false, col_matrix, false,
                           static_cast<float>(1), &out_slice,
                           static_cast<float>(0));
-- 
GitLab