im2col_test.cc 9.4 KB
Newer Older
1
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
H
hedaoyuan 已提交
2 3 4 5 6 7 8 9 10 11 12 13 14

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */

Y
Yi Wang 已提交
15
#include "paddle/fluid/operators/math/im2col.h"
H
hedaoyuan 已提交
16
#include <gtest/gtest.h>
17
#include <sys/time.h>
A
Abhinav Arora 已提交
18
#include <vector>
19
#include "paddle/fluid/operators/math/im2col_cfo_cpu.h"
H
hedaoyuan 已提交
20

Q
QI JUN 已提交
21
template <typename DeviceContext, typename Place>
H
hedaoyuan 已提交
22 23
void testIm2col() {
  paddle::framework::Tensor input_tmp;
H
hedaoyuan 已提交
24 25 26
  paddle::framework::Tensor input;
  paddle::framework::Tensor output_cfo;
  paddle::framework::Tensor output_ocf;
H
hedaoyuan 已提交
27
  paddle::framework::Tensor output_tmp;
H
hedaoyuan 已提交
28 29 30 31 32 33 34 35 36 37 38 39

  /**
   * input = [0, 1, 2,
   *          3, 4, 5]
   *
   * output_cfo = [0, 1
   *               1, 2
   *               3, 4
   *               4, 5]
   *
   * output_ocf = [0, 1, 3, 4
   *               1, 2, 4, 5]
40 41 42 43 44 45
   *
   * col2im_cfo = [0, 2, 2
   *               3, 4, 5]
   *
   * col2im_ocf = [0, 2, 2
   *               3, 4, 5]
H
hedaoyuan 已提交
46
   */
H
hedaoyuan 已提交
47 48 49
  int input_height = 2;
  int input_width = 3;
  int filter_size = 2;
C
chengduoZH 已提交
50 51 52 53 54 55 56 57
  std::vector<int> stride({1, 1});  // stride_y, stride_x
  std::vector<int> padding(
      {0, 0, 0, 0});                  // up_pad, left_pad, down_pad, right_pad
  std::vector<int> dilation({1, 1});  // dilation_y, dilation_x
  int output_height =
      (input_height - filter_size + padding[0] + padding[1]) / stride[0] + 1;
  int output_width =
      (input_width - filter_size + padding[2] + padding[3]) / stride[1] + 1;
H
hedaoyuan 已提交
58 59
  float* input_ptr = input_tmp.mutable_data<float>(
      {1, input_height, input_width}, paddle::platform::CPUPlace());
H
hedaoyuan 已提交
60 61
  float arr[6] = {0, 1, 2, 3, 4, 5};
  memcpy(input_ptr, arr, 6 * sizeof(float));
H
hedaoyuan 已提交
62 63

  auto* place = new Place();
Q
QI JUN 已提交
64
  DeviceContext* context = new DeviceContext(*place);
H
hedaoyuan 已提交
65 66 67
  if (paddle::platform::is_cpu_place(*place)) {
    input = input_tmp;
  } else {
F
fengjiayi 已提交
68
    TensorCopySync(input_tmp, *place, &input);
H
hedaoyuan 已提交
69
  }
H
hedaoyuan 已提交
70
  output_cfo.mutable_data<float>(
H
hedaoyuan 已提交
71
      {1, filter_size, filter_size, output_height, output_width}, *place);
H
hedaoyuan 已提交
72
  output_ocf.mutable_data<float>(
H
hedaoyuan 已提交
73
      {output_height, output_width, 1, filter_size, filter_size}, *place);
H
hedaoyuan 已提交
74

75
  // Im2Col
H
hedaoyuan 已提交
76
  paddle::operators::math::Im2ColFunctor<
Q
QI JUN 已提交
77
      paddle::operators::math::ColFormat::kCFO, DeviceContext, float>
H
hedaoyuan 已提交
78 79
      im2col;
  paddle::operators::math::Im2ColFunctor<
Q
QI JUN 已提交
80
      paddle::operators::math::ColFormat::kOCF, DeviceContext, float>
H
hedaoyuan 已提交
81 82
      im2col_ocf;

C
chengduoZH 已提交
83 84
  im2col(*context, input, dilation, stride, padding, &output_cfo);
  im2col_ocf(*context, input, dilation, stride, padding, &output_ocf);
85 86 87

  float out_cfo_data[] = {0, 1, 1, 2, 3, 4, 4, 5};
  float out_ocf_data[] = {0, 1, 3, 4, 1, 2, 4, 5};
H
hedaoyuan 已提交
88

H
hedaoyuan 已提交
89 90 91 92
  float* out_cfo_ptr;
  if (paddle::platform::is_cpu_place(*place)) {
    out_cfo_ptr = output_cfo.data<float>();
  } else {
F
fengjiayi 已提交
93
    TensorCopySync(output_cfo, paddle::platform::CPUPlace(), &output_tmp);
H
hedaoyuan 已提交
94 95
    out_cfo_ptr = output_tmp.data<float>();
  }
96 97 98
  for (int i = 0; i < 6; ++i) {
    EXPECT_EQ(out_cfo_ptr[i], out_cfo_data[i]);
  }
H
hedaoyuan 已提交
99

H
hedaoyuan 已提交
100 101 102 103
  float* out_ocf_ptr;
  if (paddle::platform::is_cpu_place(*place)) {
    out_ocf_ptr = output_ocf.data<float>();
  } else {
F
fengjiayi 已提交
104
    TensorCopySync(output_ocf, paddle::platform::CPUPlace(), &output_tmp);
H
hedaoyuan 已提交
105 106
    out_ocf_ptr = output_tmp.data<float>();
  }
Y
Yiqun Liu 已提交
107

108 109 110 111 112 113
  for (int i = 0; i < 6; ++i) {
    EXPECT_EQ(out_ocf_ptr[i], out_ocf_data[i]);
  }

  // Col2Im: kCFO
  paddle::operators::math::Col2ImFunctor<
Q
QI JUN 已提交
114
      paddle::operators::math::ColFormat::kCFO, DeviceContext, float>
115 116
      col2im;
  paddle::operators::math::Col2ImFunctor<
Q
QI JUN 已提交
117
      paddle::operators::math::ColFormat::kOCF, DeviceContext, float>
118 119 120 121 122 123 124
      col2im_ocf;
  float col2im_data[] = {0, 2, 2, 3, 8, 5};

  memset(input_ptr, 0, 6 * sizeof(float));
  if (paddle::platform::is_cpu_place(*place)) {
    input = input_tmp;
  } else {
F
fengjiayi 已提交
125
    TensorCopySync(input_tmp, *place, &input);
126 127
  }

C
chengduoZH 已提交
128
  col2im(*context, output_cfo, dilation, stride, padding, &input);
129 130 131 132 133

  float* in_ptr;
  if (paddle::platform::is_cpu_place(*place)) {
    in_ptr = input.data<float>();
  } else {
F
fengjiayi 已提交
134
    TensorCopySync(input, paddle::platform::CPUPlace(), &input_tmp);
135 136 137 138 139 140 141 142 143 144 145
    in_ptr = input_tmp.data<float>();
  }
  for (int i = 0; i < 6; ++i) {
    EXPECT_EQ(in_ptr[i], col2im_data[i]);
  }

  // Col2Im: kOCF
  memset(input_ptr, 0, 6 * sizeof(float));
  if (paddle::platform::is_cpu_place(*place)) {
    input = input_tmp;
  } else {
F
fengjiayi 已提交
146
    TensorCopySync(input_tmp, *place, &input);
147 148
  }

C
chengduoZH 已提交
149
  col2im_ocf(*context, output_ocf, dilation, stride, padding, &input);
150 151 152 153

  if (paddle::platform::is_cpu_place(*place)) {
    in_ptr = input.data<float>();
  } else {
F
fengjiayi 已提交
154
    TensorCopySync(input, paddle::platform::CPUPlace(), &input_tmp);
155 156 157 158 159
    in_ptr = input_tmp.data<float>();
  }
  for (int i = 0; i < 6; ++i) {
    EXPECT_EQ(in_ptr[i], col2im_data[i]);
  }
Y
Yiqun Liu 已提交
160 161 162

  delete place;
  delete context;
H
hedaoyuan 已提交
163
}
H
hedaoyuan 已提交
164

165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194
TEST(math, im2col) {
  testIm2col<paddle::platform::CPUDeviceContext, paddle::platform::CPUPlace>();
#ifdef PADDLE_WITH_CUDA
  testIm2col<paddle::platform::CUDADeviceContext,
             paddle::platform::CUDAPlace>();
#endif
}

#define PREPARE_IM2COL_CPU                                                   \
  paddle::platform::CPUPlace place;                                          \
  paddle::platform::CPUDeviceContext context(place);                         \
  paddle::framework::Tensor input;                                           \
  paddle::framework::Tensor out;                                             \
  paddle::framework::Tensor ref;                                             \
  std::vector<int> padding({ph, pw});                                        \
  std::vector<int> stride({1, 1});                                           \
  std::vector<int> dilation({1, 1});                                         \
  float* input_ptr = input.mutable_data<float>({ic, ih, iw}, place);         \
  for (int i = 0; i < input.numel(); ++i) {                                  \
    input_ptr[i] = static_cast<float>(i + 1);                                \
  }                                                                          \
  int output_height = (ih - fh + padding[0] * 2) / stride[0] + 1;            \
  int output_width = (iw - fw + padding[1] * 2) / stride[1] + 1;             \
  out.mutable_data<float>({ic, fh, fw, output_height, output_width}, place); \
  ref.mutable_data<float>({ic, fh, fw, output_height, output_width}, place); \
  paddle::operators::math::Im2ColFunctor<                                    \
      paddle::operators::math::ColFormat::kCFO,                              \
      paddle::platform::CPUDeviceContext, float>                             \
      im2col

T
tensor-tang 已提交
195
void testIm2colCPU(int ic, int ih, int iw, int fh, int fw, int ph, int pw) {
196 197 198 199 200 201 202 203 204 205
  PREPARE_IM2COL_CPU;

  im2col(context, input, dilation, stride, padding, &out);
  paddle::operators::math::im2col_common<float>(input, dilation, stride,
                                                padding, &ref);

  float* ref_data = ref.data<float>();
  float* out_data = out.data<float>();
  for (int i = 0; i < out.numel(); ++i) {
    EXPECT_EQ(out_data[i], ref_data[i]);
T
tensor-tang 已提交
206
  }
207
}
T
tensor-tang 已提交
208

209 210
void benchIm2col(int ic, int ih, int iw, int fh, int fw, int ph, int pw) {
  PREPARE_IM2COL_CPU;
T
tensor-tang 已提交
211
  constexpr int repeat = 100;
212 213 214 215
  auto GetCurrentMs = []() -> double {
    struct timeval time;
    gettimeofday(&time, NULL);
    return 1e+3 * time.tv_sec + 1e-3 * time.tv_usec;
T
tensor-tang 已提交
216
  };
217 218 219 220 221
  auto t1 = GetCurrentMs();
  for (int i = 0; i < repeat; ++i) {
    im2col(context, input, dilation, stride, padding, &out);
  }
  auto t2 = GetCurrentMs();
T
tensor-tang 已提交
222

223 224 225
  for (int i = 0; i < repeat; ++i) {
    paddle::operators::math::im2col_common<float>(input, dilation, stride,
                                                  padding, &ref);
T
tensor-tang 已提交
226
  }
227 228 229
  auto t3 = GetCurrentMs();

  LOG(INFO) << "before: " << (t3 - t2) / repeat
230 231
            << ",after: " << (t2 - t1) / repeat
            << ",boost: " << ((t3 - t2) / (t2 - t1) - 1) * 100 << "%";
T
tensor-tang 已提交
232 233
}

234
TEST(math, im2col_cputest) {
T
tensor-tang 已提交
235 236 237 238 239 240 241 242 243
  // padding_h == padding_w
  for (int p = 0; p < 4; ++p) {
    // width == height
    testIm2colCPU(/*ic*/ 2, /*ih*/ 5, /*iw*/ 5, /*fh*/ 4, /*fw*/ 4, /*ph*/ p,
                  /*pw*/ p);
    testIm2colCPU(/*ic*/ 2, /*ih*/ 4, /*iw*/ 4, /*fh*/ 3, /*fw*/ 3, /*ph*/ p,
                  /*pw*/ p);
    testIm2colCPU(/*ic*/ 2, /*ih*/ 4, /*iw*/ 4, /*fh*/ 2, /*fw*/ 2, /*ph*/ p,
                  /*pw*/ p);
244

T
tensor-tang 已提交
245 246 247
    // height != width
    testIm2colCPU(/*ic*/ 2, /*ih*/ 5, /*iw*/ 4, /*fh*/ 2, /*fw*/ 3, /*ph*/ p,
                  /*pw*/ p);
248 249 250 251
    testIm2colCPU(/*ic*/ 2, /*ih*/ 5, /*iw*/ 4, /*fh*/ 1, /*fw*/ 3, /*ph*/ p,
                  /*pw*/ p);
    testIm2colCPU(/*ic*/ 2, /*ih*/ 4, /*iw*/ 5, /*fh*/ 3, /*fw*/ 1, /*ph*/ p,
                  /*pw*/ p);
T
tensor-tang 已提交
252 253 254 255 256 257 258

    // filter == 1
    testIm2colCPU(/*ic*/ 3, /*ih*/ 4, /*iw*/ 4, /*fh*/ 1, /*fw*/ 1, /*ph*/ p,
                  /*pw*/ p);
    testIm2colCPU(/*ic*/ 3, /*ih*/ 3, /*iw*/ 4, /*fh*/ 1, /*fw*/ 1, /*ph*/ p,
                  /*pw*/ p);
  }
259

T
tensor-tang 已提交
260 261 262 263 264
  // padding_h != padding_w
  testIm2colCPU(/*ic*/ 2, /*ih*/ 4, /*iw*/ 4, /*fh*/ 2, /*fw*/ 3, /*ph*/ 1,
                /*pw*/ 2);

  // benchmark
265 266
  for (int p : {0, 1}) {
    for (int k : {1, 3, 5}) {
T
tensor-tang 已提交
267 268 269 270 271
      LOG(INFO) << "padding == " << p << ", filter == " << k;
      benchIm2col(/*ic*/ 3, /*ih*/ 224, /*iw*/ 224, /*fh*/ k, /*fw*/ k,
                  /*ph*/ p, /*pw*/ p);
    }
  }
272
}