update conv op kernel

781d5fe3 · zhaojiaying01 · d0dc4984 · 781d5fe3
隐藏空白更改
内联并排

Showing with 7 addition and 23 deletion

src/operators/kernel/arm/conv_kernel.cpp src/operators/kernel/arm/conv_kernel.cpp +7 -23

未找到文件。
--- a/src/operators/kernel/arm/conv_kernel.cpp
+++ b/src/operators/kernel/arm/conv_kernel.cpp
@@ -35,14 +35,9 @@ void ConvKernel<CPU, float>::Compute(const ConvParam &param) const {
  LOG(kLOG_DEBUG) << param;
  const Tensor *input = param.Input();
-  // The filter will be reshaped in the calculations,
-  // so here use an assignment operation,
-  // that avoids modifying the variable in the Scope.
  Tensor filter = *param.Filter();
  Tensor *output = param.Output();
-  //            output->mutable_data<T>(context.GetPlace());
+  output->mutable_data<float>();
  int groups = param.Groups();
  std::vector<int> strides = param.Strides();
@@ -53,17 +48,9 @@ void ConvKernel<CPU, float>::Compute(const ConvParam &param) const {
  const int batch_size = static_cast<int>(input->dims()[0]);
-  // filter_shape_vec: {k_o, k_i, k_h, k_w} or {k_o, k_i, k_d, k_h,
-  // k_w}
  std::vector<int64_t> filter_shape_vec(framework::vectorize(filter.dims()));
-  // output_shape_vec: {o_n, o_c, o_h, o_w} or {o_n, o_c, o_d, o_h,
-  // o_w}
  std::vector<int64_t> output_shape_vec(framework::vectorize(output->dims()));
-  // use col_shape in the im2col calculation
-  // col_shape_vec: {i_c/g, k_h, k_w, o_h, o_w} or {i_c/g, k_d, k_h,
-  // k_w, o_d,
-  // o_h, o_w}
  size_t data_dim = filter_shape_vec.size() - 2;
  std::vector<int64_t> col_shape_vec(1 + 2 * data_dim);
  col_shape_vec[0] = input->dims()[1] / groups;
@@ -73,24 +60,19 @@ void ConvKernel<CPU, float>::Compute(const ConvParam &param) const {
  }
  framework::DDim col_shape(framework::make_ddim(col_shape_vec));
-  // use col_matrix_shape in the gemm calculation
-  // size: (i_c/g * k_h * k_w, o_h * o_w) or (i_c/g * k_d * k_h * k_w,
-  // o_d *
-  // o_h * o_w)
  framework::DDim col_matrix_shape =
      framework::flatten_to_2d(col_shape, data_dim + 1);
  bool is_expand = IsExpand(filter_shape_vec, strides, paddings, dilations);
  Tensor col;
-  // col_matrix shares the same piece of data with col,
-  // but will be reshaped into a two-dimensional matrix shape
-  // to call the matrix multiplication interface.
  Tensor col_matrix;
  if (is_expand) {
    col.mutable_data<float>(col_shape);
    col_matrix.ShareDataWith(col);
    col_matrix.Resize(col_matrix_shape);
  }
+  DLOG << " col_shape = " << col_shape;
+  DLOG << " col_matrix_shape = " << col_matrix_shape;
  framework::DDim input_shape = framework::slice_ddim(
      input->dims(), 1, static_cast<int>(input->dims().size()));
@@ -98,6 +80,7 @@ void ConvKernel<CPU, float>::Compute(const ConvParam &param) const {
  framework::DDim filter_matrix_shape = {filter.dims()[0],
                                         filter.numel() / filter.dims()[0]};
  filter.Resize(filter_matrix_shape);
+  DLOG << " filter.deims() = " << filter.dims();
  framework::DDim output_matrix_shape = {
      output->dims()[1],
@@ -110,8 +93,6 @@ void ConvKernel<CPU, float>::Compute(const ConvParam &param) const {
  math::Vol2ColFunctor<CPU, float> vol2col;
  math::Im2ColFunctor<math::ColFormat::kCFO, CPU, float> im2col;
-  //            auto& dev_ctx = context.template
-  //            device_context<DeviceContext>();
  for (int i = 0; i < batch_size; i++) {
    Tensor in_batch = input->Slice(i, i + 1).Resize(input_shape);
    Tensor out_batch = output->Slice(i, i + 1).Resize(output_matrix_shape);
@@ -137,6 +118,9 @@ void ConvKernel<CPU, float>::Compute(const ConvParam &param) const {
      // gemm
      Tensor out_slice = out_batch.Slice(g * out_step, (g + 1) * out_step);
      Tensor filter_slice = filter.Slice(g * out_step, (g + 1) * out_step);
+      DLOG << " out_slice " << out_slice.dims();
+      DLOG << " filter_slice " << filter_slice.dims();
+      DLOG << " col_matrix " << col_matrix.dims();
      math::matmul<float>(filter_slice, false, col_matrix, false,
                          static_cast<float>(1), &out_slice,
                          static_cast<float>(0));