im2col_test.cc 9.4 KB
Newer Older
1
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
H
hedaoyuan 已提交
2 3 4 5 6 7 8 9 10 11 12 13 14

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */

Y
Yi Wang 已提交
15
#include "paddle/fluid/operators/math/im2col.h"
H
hedaoyuan 已提交
16
#include <gtest/gtest.h>
17
#include "paddle/fluid/operators/math/im2col_cfo_cpu.h"
H
hedaoyuan 已提交
18

Q
QI JUN 已提交
19
template <typename DeviceContext, typename Place>
H
hedaoyuan 已提交
20 21
void testIm2col() {
  paddle::framework::Tensor input_tmp;
H
hedaoyuan 已提交
22 23 24
  paddle::framework::Tensor input;
  paddle::framework::Tensor output_cfo;
  paddle::framework::Tensor output_ocf;
H
hedaoyuan 已提交
25
  paddle::framework::Tensor output_tmp;
H
hedaoyuan 已提交
26 27 28 29 30 31 32 33 34 35 36 37

  /**
   * input = [0, 1, 2,
   *          3, 4, 5]
   *
   * output_cfo = [0, 1
   *               1, 2
   *               3, 4
   *               4, 5]
   *
   * output_ocf = [0, 1, 3, 4
   *               1, 2, 4, 5]
38 39 40 41 42 43
   *
   * col2im_cfo = [0, 2, 2
   *               3, 4, 5]
   *
   * col2im_ocf = [0, 2, 2
   *               3, 4, 5]
H
hedaoyuan 已提交
44
   */
H
hedaoyuan 已提交
45 46 47
  int input_height = 2;
  int input_width = 3;
  int filter_size = 2;
C
chengduoZH 已提交
48 49 50 51 52 53 54 55
  std::vector<int> stride({1, 1});  // stride_y, stride_x
  std::vector<int> padding(
      {0, 0, 0, 0});                  // up_pad, left_pad, down_pad, right_pad
  std::vector<int> dilation({1, 1});  // dilation_y, dilation_x
  int output_height =
      (input_height - filter_size + padding[0] + padding[1]) / stride[0] + 1;
  int output_width =
      (input_width - filter_size + padding[2] + padding[3]) / stride[1] + 1;
H
hedaoyuan 已提交
56 57
  float* input_ptr = input_tmp.mutable_data<float>(
      {1, input_height, input_width}, paddle::platform::CPUPlace());
H
hedaoyuan 已提交
58 59
  float arr[6] = {0, 1, 2, 3, 4, 5};
  memcpy(input_ptr, arr, 6 * sizeof(float));
H
hedaoyuan 已提交
60 61

  auto* place = new Place();
Q
QI JUN 已提交
62
  DeviceContext* context = new DeviceContext(*place);
H
hedaoyuan 已提交
63 64 65
  if (paddle::platform::is_cpu_place(*place)) {
    input = input_tmp;
  } else {
F
fengjiayi 已提交
66
    TensorCopySync(input_tmp, *place, &input);
H
hedaoyuan 已提交
67
  }
H
hedaoyuan 已提交
68
  output_cfo.mutable_data<float>(
H
hedaoyuan 已提交
69
      {1, filter_size, filter_size, output_height, output_width}, *place);
H
hedaoyuan 已提交
70
  output_ocf.mutable_data<float>(
H
hedaoyuan 已提交
71
      {output_height, output_width, 1, filter_size, filter_size}, *place);
H
hedaoyuan 已提交
72

73
  // Im2Col
H
hedaoyuan 已提交
74
  paddle::operators::math::Im2ColFunctor<
Q
QI JUN 已提交
75
      paddle::operators::math::ColFormat::kCFO, DeviceContext, float>
H
hedaoyuan 已提交
76 77
      im2col;
  paddle::operators::math::Im2ColFunctor<
Q
QI JUN 已提交
78
      paddle::operators::math::ColFormat::kOCF, DeviceContext, float>
H
hedaoyuan 已提交
79 80
      im2col_ocf;

C
chengduoZH 已提交
81 82
  im2col(*context, input, dilation, stride, padding, &output_cfo);
  im2col_ocf(*context, input, dilation, stride, padding, &output_ocf);
83 84 85

  float out_cfo_data[] = {0, 1, 1, 2, 3, 4, 4, 5};
  float out_ocf_data[] = {0, 1, 3, 4, 1, 2, 4, 5};
H
hedaoyuan 已提交
86

H
hedaoyuan 已提交
87 88 89 90
  float* out_cfo_ptr;
  if (paddle::platform::is_cpu_place(*place)) {
    out_cfo_ptr = output_cfo.data<float>();
  } else {
F
fengjiayi 已提交
91
    TensorCopySync(output_cfo, paddle::platform::CPUPlace(), &output_tmp);
H
hedaoyuan 已提交
92 93
    out_cfo_ptr = output_tmp.data<float>();
  }
94 95 96
  for (int i = 0; i < 6; ++i) {
    EXPECT_EQ(out_cfo_ptr[i], out_cfo_data[i]);
  }
H
hedaoyuan 已提交
97

H
hedaoyuan 已提交
98 99 100 101
  float* out_ocf_ptr;
  if (paddle::platform::is_cpu_place(*place)) {
    out_ocf_ptr = output_ocf.data<float>();
  } else {
F
fengjiayi 已提交
102
    TensorCopySync(output_ocf, paddle::platform::CPUPlace(), &output_tmp);
H
hedaoyuan 已提交
103 104
    out_ocf_ptr = output_tmp.data<float>();
  }
Y
Yiqun Liu 已提交
105

106 107 108 109 110 111
  for (int i = 0; i < 6; ++i) {
    EXPECT_EQ(out_ocf_ptr[i], out_ocf_data[i]);
  }

  // Col2Im: kCFO
  paddle::operators::math::Col2ImFunctor<
Q
QI JUN 已提交
112
      paddle::operators::math::ColFormat::kCFO, DeviceContext, float>
113 114
      col2im;
  paddle::operators::math::Col2ImFunctor<
Q
QI JUN 已提交
115
      paddle::operators::math::ColFormat::kOCF, DeviceContext, float>
116 117 118 119 120 121 122
      col2im_ocf;
  float col2im_data[] = {0, 2, 2, 3, 8, 5};

  memset(input_ptr, 0, 6 * sizeof(float));
  if (paddle::platform::is_cpu_place(*place)) {
    input = input_tmp;
  } else {
F
fengjiayi 已提交
123
    TensorCopySync(input_tmp, *place, &input);
124 125
  }

C
chengduoZH 已提交
126
  col2im(*context, output_cfo, dilation, stride, padding, &input);
127 128 129 130 131

  float* in_ptr;
  if (paddle::platform::is_cpu_place(*place)) {
    in_ptr = input.data<float>();
  } else {
F
fengjiayi 已提交
132
    TensorCopySync(input, paddle::platform::CPUPlace(), &input_tmp);
133 134 135 136 137 138 139 140 141 142 143
    in_ptr = input_tmp.data<float>();
  }
  for (int i = 0; i < 6; ++i) {
    EXPECT_EQ(in_ptr[i], col2im_data[i]);
  }

  // Col2Im: kOCF
  memset(input_ptr, 0, 6 * sizeof(float));
  if (paddle::platform::is_cpu_place(*place)) {
    input = input_tmp;
  } else {
F
fengjiayi 已提交
144
    TensorCopySync(input_tmp, *place, &input);
145 146
  }

C
chengduoZH 已提交
147
  col2im_ocf(*context, output_ocf, dilation, stride, padding, &input);
148 149 150 151

  if (paddle::platform::is_cpu_place(*place)) {
    in_ptr = input.data<float>();
  } else {
F
fengjiayi 已提交
152
    TensorCopySync(input, paddle::platform::CPUPlace(), &input_tmp);
153 154 155 156 157
    in_ptr = input_tmp.data<float>();
  }
  for (int i = 0; i < 6; ++i) {
    EXPECT_EQ(in_ptr[i], col2im_data[i]);
  }
Y
Yiqun Liu 已提交
158 159 160

  delete place;
  delete context;
H
hedaoyuan 已提交
161
}
H
hedaoyuan 已提交
162

163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192
TEST(math, im2col) {
  testIm2col<paddle::platform::CPUDeviceContext, paddle::platform::CPUPlace>();
#ifdef PADDLE_WITH_CUDA
  testIm2col<paddle::platform::CUDADeviceContext,
             paddle::platform::CUDAPlace>();
#endif
}

#define PREPARE_IM2COL_CPU                                                   \
  paddle::platform::CPUPlace place;                                          \
  paddle::platform::CPUDeviceContext context(place);                         \
  paddle::framework::Tensor input;                                           \
  paddle::framework::Tensor out;                                             \
  paddle::framework::Tensor ref;                                             \
  std::vector<int> padding({ph, pw});                                        \
  std::vector<int> stride({1, 1});                                           \
  std::vector<int> dilation({1, 1});                                         \
  float* input_ptr = input.mutable_data<float>({ic, ih, iw}, place);         \
  for (int i = 0; i < input.numel(); ++i) {                                  \
    input_ptr[i] = static_cast<float>(i + 1);                                \
  }                                                                          \
  int output_height = (ih - fh + padding[0] * 2) / stride[0] + 1;            \
  int output_width = (iw - fw + padding[1] * 2) / stride[1] + 1;             \
  out.mutable_data<float>({ic, fh, fw, output_height, output_width}, place); \
  ref.mutable_data<float>({ic, fh, fw, output_height, output_width}, place); \
  paddle::operators::math::Im2ColFunctor<                                    \
      paddle::operators::math::ColFormat::kCFO,                              \
      paddle::platform::CPUDeviceContext, float>                             \
      im2col

T
tensor-tang 已提交
193
void testIm2colCPU(int ic, int ih, int iw, int fh, int fw, int ph, int pw) {
194 195 196 197 198 199 200 201 202 203
  PREPARE_IM2COL_CPU;

  im2col(context, input, dilation, stride, padding, &out);
  paddle::operators::math::im2col_common<float>(input, dilation, stride,
                                                padding, &ref);

  float* ref_data = ref.data<float>();
  float* out_data = out.data<float>();
  for (int i = 0; i < out.numel(); ++i) {
    EXPECT_EQ(out_data[i], ref_data[i]);
T
tensor-tang 已提交
204
  }
205
}
T
tensor-tang 已提交
206

207 208
void benchIm2col(int ic, int ih, int iw, int fh, int fw, int ph, int pw) {
  PREPARE_IM2COL_CPU;
T
tensor-tang 已提交
209
  constexpr int repeat = 100;
210 211 212 213
  auto GetCurrentMs = []() -> double {
    struct timeval time;
    gettimeofday(&time, NULL);
    return 1e+3 * time.tv_sec + 1e-3 * time.tv_usec;
T
tensor-tang 已提交
214
  };
215 216 217 218 219
  auto t1 = GetCurrentMs();
  for (int i = 0; i < repeat; ++i) {
    im2col(context, input, dilation, stride, padding, &out);
  }
  auto t2 = GetCurrentMs();
T
tensor-tang 已提交
220

221 222 223
  for (int i = 0; i < repeat; ++i) {
    paddle::operators::math::im2col_common<float>(input, dilation, stride,
                                                  padding, &ref);
T
tensor-tang 已提交
224
  }
225 226 227
  auto t3 = GetCurrentMs();

  LOG(INFO) << "before: " << (t3 - t2) / repeat
228 229
            << ",after: " << (t2 - t1) / repeat
            << ",boost: " << ((t3 - t2) / (t2 - t1) - 1) * 100 << "%";
T
tensor-tang 已提交
230 231
}

232
TEST(math, im2col_cputest) {
T
tensor-tang 已提交
233 234 235 236 237 238 239 240 241
  // padding_h == padding_w
  for (int p = 0; p < 4; ++p) {
    // width == height
    testIm2colCPU(/*ic*/ 2, /*ih*/ 5, /*iw*/ 5, /*fh*/ 4, /*fw*/ 4, /*ph*/ p,
                  /*pw*/ p);
    testIm2colCPU(/*ic*/ 2, /*ih*/ 4, /*iw*/ 4, /*fh*/ 3, /*fw*/ 3, /*ph*/ p,
                  /*pw*/ p);
    testIm2colCPU(/*ic*/ 2, /*ih*/ 4, /*iw*/ 4, /*fh*/ 2, /*fw*/ 2, /*ph*/ p,
                  /*pw*/ p);
242

T
tensor-tang 已提交
243 244 245
    // height != width
    testIm2colCPU(/*ic*/ 2, /*ih*/ 5, /*iw*/ 4, /*fh*/ 2, /*fw*/ 3, /*ph*/ p,
                  /*pw*/ p);
246 247 248 249
    testIm2colCPU(/*ic*/ 2, /*ih*/ 5, /*iw*/ 4, /*fh*/ 1, /*fw*/ 3, /*ph*/ p,
                  /*pw*/ p);
    testIm2colCPU(/*ic*/ 2, /*ih*/ 4, /*iw*/ 5, /*fh*/ 3, /*fw*/ 1, /*ph*/ p,
                  /*pw*/ p);
T
tensor-tang 已提交
250 251 252 253 254 255 256

    // filter == 1
    testIm2colCPU(/*ic*/ 3, /*ih*/ 4, /*iw*/ 4, /*fh*/ 1, /*fw*/ 1, /*ph*/ p,
                  /*pw*/ p);
    testIm2colCPU(/*ic*/ 3, /*ih*/ 3, /*iw*/ 4, /*fh*/ 1, /*fw*/ 1, /*ph*/ p,
                  /*pw*/ p);
  }
257

T
tensor-tang 已提交
258 259 260 261 262
  // padding_h != padding_w
  testIm2colCPU(/*ic*/ 2, /*ih*/ 4, /*iw*/ 4, /*fh*/ 2, /*fw*/ 3, /*ph*/ 1,
                /*pw*/ 2);

  // benchmark
263 264
  for (int p : {0, 1}) {
    for (int k : {1, 3, 5}) {
T
tensor-tang 已提交
265 266 267 268 269
      LOG(INFO) << "padding == " << p << ", filter == " << k;
      benchIm2col(/*ic*/ 3, /*ih*/ 224, /*iw*/ 224, /*fh*/ k, /*fw*/ k,
                  /*ph*/ p, /*pw*/ p);
    }
  }
270
}