im2col.cc 13.5 KB
Newer Older
1
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
H
hedaoyuan 已提交
2 3 4 5 6 7 8 9 10 11 12 13 14

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */

Y
Yi Wang 已提交
15
#include "paddle/fluid/operators/math/im2col.h"
16
#include <vector>
H
hedaoyuan 已提交
17 18

namespace paddle {
19
namespace operators {
20
namespace math {
H
hedaoyuan 已提交
21 22

/*
H
hedaoyuan 已提交
23 24 25
 * im = [input_channels, input_height, input_width]
 * col =
 *   [input_channels, filter_height, filter_width, output_height, output_width]
H
hedaoyuan 已提交
26 27
 */
template <class T>
H
hedaoyuan 已提交
28
class Im2ColFunctor<paddle::operators::math::ColFormat::kCFO,
Q
QI JUN 已提交
29
                    platform::CPUDeviceContext, T> {
H
hedaoyuan 已提交
30
 public:
Q
QI JUN 已提交
31
  void operator()(const platform::CPUDeviceContext& context,
C
chengduoZH 已提交
32 33 34
                  const framework::Tensor& im, const std::vector<int>& dilation,
                  const std::vector<int>& stride,
                  const std::vector<int>& padding, framework::Tensor* col) {
H
hedaoyuan 已提交
35
    PADDLE_ENFORCE(im.dims().size() == 3);
C
chengduoZH 已提交
36
    PADDLE_ENFORCE(col->dims().size() == 5);
H
hedaoyuan 已提交
37

C
chengduoZH 已提交
38 39 40
    int im_channels = im.dims()[0];
    int im_height = im.dims()[1];
    int im_width = im.dims()[2];
C
chengduoZH 已提交
41 42
    int filter_height = col->dims()[1];
    int filter_width = col->dims()[2];
T
tensor-tang 已提交
43 44
    int output_height = col->dims()[3];
    int output_width = col->dims()[4];
C
chengduoZH 已提交
45

C
chengduoZH 已提交
46
    int channels_col = im_channels * filter_height * filter_width;
H
hedaoyuan 已提交
47 48

    const T* im_data = im.data<T>();
C
chengduoZH 已提交
49
    T* col_data = col->data<T>();
T
tensor-tang 已提交
50
    // TODO(TJ): change me to template
51
    // further optimize: padding == 1 need special
T
tensor-tang 已提交
52
    if (stride[0] == 1 && stride[1] == 1 && dilation[0] == 1 &&
53
        dilation[1] == 1) {
T
tensor-tang 已提交
54
      int col_matrix_width = output_width * output_height;
T
tensor-tang 已提交
55
      int im_size = im_height * im_width;
56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81
      if (padding[0] == 0 && padding[1] == 0) {
        size_t copy_size = sizeof(T) * output_width;
        for (int oh = 0; oh < output_height; ++oh) {
          const T* im_data_start = im_data + oh * im_width;
          T* dst_data = col_data + oh * output_width;
          for (int ic = 0; ic < im_channels; ++ic) {
            const T* src_data = im_data_start + ic * im_size;
            for (int kh = 0; kh < filter_height; ++kh) {
              for (int kw = 0; kw < filter_width; ++kw) {
                std::memcpy(dst_data, src_data + kw, copy_size);
                dst_data = dst_data + col_matrix_width;
              }
              src_data = src_data + im_width;
            }
          }
        }
        return;
      } else {
        int plh = padding[0];
        // int plw = padding[1];
        int prh =
            (output_height - 1) * stride[0] + filter_height - im_height - plh;
        // int prw =  (output_width - 1) * stride[1] + filter_width - im_width -
        // plw;

        // fill height padding : 0 ~ plh-1, (oh-prh) ~ (oh-1)
T
tensor-tang 已提交
82
        // TODO(TJ): refine ph*xxx
83
        assert(plh == prh);  // because stride_h == 1
T
tensor-tang 已提交
84 85
        int col_block_fh = filter_width * col_matrix_width;  // fw*oh*ow
        int col_block_ic = filter_height * col_block_fh;     // fh*fw*oh*ow
86
        for (int ph = 0; ph < plh; ++ph) {
T
tensor-tang 已提交
87 88 89 90 91
          int sz = output_width * (plh - ph);
          size_t copy_sz = sizeof(T) * sz;
          T* col_start_l = col_data + ph * col_block_fh;
          T* col_start_r = col_data + (filter_height - ph - 1) * col_block_fh +
                           col_matrix_width - sz;
92
          for (int ic = 0; ic < im_channels; ++ic) {
T
tensor-tang 已提交
93 94
            T* dst_data_l = col_start_l + ic * col_block_ic;
            T* dst_data_r = col_start_r + ic * col_block_ic;
T
tensor-tang 已提交
95
            for (int kw = 0; kw < filter_width; ++kw) {
T
tensor-tang 已提交
96 97
              std::memset(dst_data_l, 0, copy_sz);
              std::memset(dst_data_r, 0, copy_sz);
98 99
              dst_data_l = dst_data_l + col_matrix_width;
              dst_data_r = dst_data_r + col_matrix_width;
T
tensor-tang 已提交
100 101 102
            }
          }
        }
103
        return;
T
tensor-tang 已提交
104 105 106
      }
    }

H
hedaoyuan 已提交
107
    for (int c = 0; c < channels_col; ++c) {
C
chengduoZH 已提交
108 109 110
      int w_offset = c % filter_width;
      int h_offset = (c / filter_width) % filter_height;
      int c_im = c / (filter_width * filter_height);
T
tensor-tang 已提交
111
      for (int h = 0; h < output_height; ++h) {
C
chengduoZH 已提交
112
        int im_row_idx = h * stride[0] - padding[0] + h_offset * dilation[0];
T
tensor-tang 已提交
113
        for (int w = 0; w < output_width; ++w) {
C
chengduoZH 已提交
114
          int im_col_idx = w * stride[1] - padding[1] + w_offset * dilation[1];
T
tensor-tang 已提交
115
          int col_idx = (c * output_height + h) * output_width + w;
C
chengduoZH 已提交
116
          int im_idx = (im_row_idx + c_im * im_height) * im_width + im_col_idx;
C
chengduoZH 已提交
117

C
chengduoZH 已提交
118 119 120 121
          col_data[col_idx] = (im_row_idx < 0 || im_row_idx >= im_height ||
                               im_col_idx < 0 || im_col_idx >= im_width)
                                  ? static_cast<T>(0)
                                  : im_data[im_idx];
H
hedaoyuan 已提交
122 123 124 125 126 127 128
        }
      }
    }
  }
};

/*
H
hedaoyuan 已提交
129 130 131
 * im = [input_channels, input_height, input_width]
 * col =
 *   [input_channels, filter_height, filter_width, output_height, output_width]
H
hedaoyuan 已提交
132 133
 */
template <class T>
H
hedaoyuan 已提交
134
class Col2ImFunctor<paddle::operators::math::ColFormat::kCFO,
Q
QI JUN 已提交
135
                    platform::CPUDeviceContext, T> {
H
hedaoyuan 已提交
136
 public:
Q
QI JUN 已提交
137
  void operator()(const platform::CPUDeviceContext& context,
C
chengduoZH 已提交
138 139 140 141 142
                  const framework::Tensor& col,
                  const std::vector<int>& dilation,
                  const std::vector<int>& stride,
                  const std::vector<int>& padding, framework::Tensor* im) {
    PADDLE_ENFORCE(im->dims().size() == 3);
H
hedaoyuan 已提交
143
    PADDLE_ENFORCE(col.dims().size() == 5);
C
chengduoZH 已提交
144 145 146
    int im_channels = im->dims()[0];
    int im_height = im->dims()[1];
    int im_width = im->dims()[2];
H
hedaoyuan 已提交
147 148
    int filter_height = col.dims()[1];
    int filter_width = col.dims()[2];
C
chengduoZH 已提交
149 150
    int col_height = col.dims()[3];
    int col_width = col.dims()[4];
C
chengduoZH 已提交
151

C
chengduoZH 已提交
152 153 154
    PADDLE_ENFORCE_EQ((im_height + padding[0] + padding[2] -
                       ((dilation[0] * (filter_height - 1) + 1))) /
                              stride[0] +
C
chengduoZH 已提交
155 156 157 158
                          1,
                      col_height,
                      "Output_height and padding(padding_up, padding_down) are "
                      "inconsistent.");
C
chengduoZH 已提交
159 160 161
    PADDLE_ENFORCE_EQ((im_width + padding[1] + padding[3] -
                       ((dilation[1] * (filter_width - 1) + 1))) /
                              stride[1] +
C
chengduoZH 已提交
162 163
                          1,
                      col_width,
C
chengduoZH 已提交
164
                      "Output_height and padding(padding_up, padding_down) are "
C
chengduoZH 已提交
165
                      "inconsistent.");
C
chengduoZH 已提交
166

C
chengduoZH 已提交
167
    int channels_col = im_channels * filter_height * filter_width;
H
hedaoyuan 已提交
168

C
chengduoZH 已提交
169
    T* im_data = im->data<T>();
H
hedaoyuan 已提交
170 171 172
    const T* col_data = col.data<T>();

    for (int c = 0; c < channels_col; ++c) {
C
chengduoZH 已提交
173 174 175
      int w_offset = c % filter_width;
      int h_offset = (c / filter_width) % filter_height;
      int c_im = c / (filter_width * filter_height);
C
chengduoZH 已提交
176
      for (int h = 0; h < col_height; ++h) {
C
chengduoZH 已提交
177
        int im_row_idx = h * stride[0] - padding[0] + h_offset * dilation[0];
C
chengduoZH 已提交
178
        for (int w = 0; w < col_width; ++w) {
C
chengduoZH 已提交
179
          int im_col_idx = w * stride[1] - padding[1] + w_offset * dilation[1];
C
chengduoZH 已提交
180 181
          if ((im_row_idx) >= 0 && (im_row_idx) < im_height &&
              (im_col_idx) >= 0 && (im_col_idx) < im_width) {
C
chengduoZH 已提交
182
            im_data[(im_row_idx + c_im * im_height) * im_width + im_col_idx] +=
C
chengduoZH 已提交
183
                col_data[(c * col_height + h) * col_width + w];
H
hedaoyuan 已提交
184 185 186 187 188 189 190
          }
        }
      }
    }
  }
};

H
hedaoyuan 已提交
191
template class Im2ColFunctor<paddle::operators::math::ColFormat::kCFO,
Q
QI JUN 已提交
192
                             platform::CPUDeviceContext, float>;
H
hedaoyuan 已提交
193
template class Im2ColFunctor<paddle::operators::math::ColFormat::kCFO,
Q
QI JUN 已提交
194
                             platform::CPUDeviceContext, double>;
H
hedaoyuan 已提交
195
template class Col2ImFunctor<paddle::operators::math::ColFormat::kCFO,
Q
QI JUN 已提交
196
                             platform::CPUDeviceContext, float>;
H
hedaoyuan 已提交
197
template class Col2ImFunctor<paddle::operators::math::ColFormat::kCFO,
Q
QI JUN 已提交
198
                             platform::CPUDeviceContext, double>;
H
hedaoyuan 已提交
199 200

/*
H
hedaoyuan 已提交
201 202 203
 * im = [input_channels, input_height, input_width]
 * col =
 *   [output_height, output_width, input_channels, filter_height, filter_width]
H
hedaoyuan 已提交
204 205
 */
template <class T>
H
hedaoyuan 已提交
206
class Im2ColFunctor<paddle::operators::math::ColFormat::kOCF,
Q
QI JUN 已提交
207
                    platform::CPUDeviceContext, T> {
H
hedaoyuan 已提交
208
 public:
Q
QI JUN 已提交
209
  void operator()(const platform::CPUDeviceContext& context,
C
chengduoZH 已提交
210 211 212
                  const framework::Tensor& im, const std::vector<int>& dilation,
                  const std::vector<int>& stride,
                  const std::vector<int>& padding, framework::Tensor* col) {
H
hedaoyuan 已提交
213
    PADDLE_ENFORCE(im.dims().size() == 3);
C
chengduoZH 已提交
214
    PADDLE_ENFORCE(col->dims().size() == 5);
C
chengduoZH 已提交
215 216 217
    int im_channels = im.dims()[0];
    int im_height = im.dims()[1];
    int im_width = im.dims()[2];
C
chengduoZH 已提交
218 219 220 221
    int filter_height = col->dims()[3];
    int filter_width = col->dims()[4];
    int col_height = col->dims()[0];
    int col_width = col->dims()[1];
H
hedaoyuan 已提交
222 223

    const T* im_data = im.data<T>();
C
chengduoZH 已提交
224
    T* col_data = col->data<T>();
H
hedaoyuan 已提交
225

C
chengduoZH 已提交
226 227 228
    for (int col_row_idx = 0; col_row_idx < col_height; ++col_row_idx) {
      for (int col_col_idx = 0; col_col_idx < col_width; ++col_col_idx) {
        for (int channel = 0; channel < im_channels; ++channel) {
H
hedaoyuan 已提交
229 230
          for (int filter_row_idx = 0; filter_row_idx < filter_height;
               ++filter_row_idx) {
C
refine  
chengduoZH 已提交
231 232
            int im_row_offset =
                col_row_idx * stride[0] + filter_row_idx - padding[0];
H
hedaoyuan 已提交
233 234 235
            for (int filter_col_idx = 0; filter_col_idx < filter_width;
                 ++filter_col_idx) {
              int im_col_offset =
C
chengduoZH 已提交
236
                  col_col_idx * stride[1] + filter_col_idx - padding[1];
C
refine  
chengduoZH 已提交
237

C
chengduoZH 已提交
238 239 240 241 242 243 244 245 246 247 248 249 250 251 252
              int col_offset =
                  ((((col_row_idx)*col_width + col_col_idx) * im_channels +
                    channel) *
                       filter_height +
                   filter_row_idx) *
                      filter_width +
                  filter_col_idx;

              int im_offset = (channel * im_height + im_row_offset) * im_width +
                              im_col_offset;
              col_data[col_offset] =
                  (im_row_offset < 0 || im_row_offset >= im_height ||
                   im_col_offset < 0 || im_col_offset >= im_width)
                      ? static_cast<T>(0)
                      : im_data[im_offset];
H
hedaoyuan 已提交
253 254 255 256 257 258 259 260 261
            }
          }
        }
      }
    }
  }
};

/*
H
hedaoyuan 已提交
262 263 264
 * im = [input_channels, input_height, input_width]
 * col =
 *   [output_height, output_width, input_channels, filter_height, filter_width]
H
hedaoyuan 已提交
265 266
 */
template <class T>
H
hedaoyuan 已提交
267
class Col2ImFunctor<paddle::operators::math::ColFormat::kOCF,
Q
QI JUN 已提交
268
                    platform::CPUDeviceContext, T> {
H
hedaoyuan 已提交
269
 public:
Q
QI JUN 已提交
270
  void operator()(const platform::CPUDeviceContext& context,
C
chengduoZH 已提交
271 272 273 274 275
                  const framework::Tensor& col,
                  const std::vector<int>& dilation,
                  const std::vector<int>& stride,
                  const std::vector<int>& padding, framework::Tensor* im) {
    PADDLE_ENFORCE(im->dims().size() == 3);
H
hedaoyuan 已提交
276
    PADDLE_ENFORCE(col.dims().size() == 5);
C
chengduoZH 已提交
277 278 279
    int im_channels = im->dims()[0];
    int im_height = im->dims()[1];
    int im_width = im->dims()[2];
H
hedaoyuan 已提交
280 281
    int filter_height = col.dims()[3];
    int filter_width = col.dims()[4];
C
chengduoZH 已提交
282 283
    int col_height = col.dims()[0];
    int col_width = col.dims()[1];
H
hedaoyuan 已提交
284

C
chengduoZH 已提交
285 286 287 288 289 290 291 292 293 294
    PADDLE_ENFORCE_EQ(
        (im_height + padding[0] + padding[2] - filter_height) / stride[0] + 1,
        col_height,
        "Output_height and padding(padding_up, padding_down) are "
        "inconsistent.");
    PADDLE_ENFORCE_EQ(
        (im_width + padding[1] + padding[3] - filter_width) / stride[1] + 1,
        col_width,
        "col_width and padding(padding_left, padding_right) are "
        "inconsistent.");
295

C
chengduoZH 已提交
296
    T* im_data = im->data<T>();
H
hedaoyuan 已提交
297 298
    const T* col_data = col.data<T>();

C
chengduoZH 已提交
299 300 301
    for (int col_row_idx = 0; col_row_idx < col_height; ++col_row_idx) {
      for (int col_col_idx = 0; col_col_idx < col_width; ++col_col_idx) {
        for (int channel = 0; channel < im_channels; ++channel) {
H
hedaoyuan 已提交
302 303
          for (int filter_row_idx = 0; filter_row_idx < filter_height;
               ++filter_row_idx) {
C
refine  
chengduoZH 已提交
304 305
            int im_row_offset =
                col_row_idx * stride[0] + filter_row_idx - padding[0];
H
hedaoyuan 已提交
306 307 308
            for (int filter_col_idx = 0; filter_col_idx < filter_width;
                 ++filter_col_idx) {
              int im_col_offset =
C
chengduoZH 已提交
309
                  col_col_idx * stride[1] + filter_col_idx - padding[1];
C
refine  
chengduoZH 已提交
310

C
chengduoZH 已提交
311 312 313 314 315 316 317
              int col_offset =
                  (((col_row_idx * col_width + col_col_idx) * im_channels +
                    channel) *
                       filter_height +
                   filter_row_idx) *
                      filter_width +
                  filter_col_idx;
C
refine  
chengduoZH 已提交
318

C
chengduoZH 已提交
319 320
              if (im_row_offset >= 0 && im_row_offset < im_height &&
                  im_col_offset >= 0 && im_col_offset < im_width) {
H
hedaoyuan 已提交
321
                int im_offset =
C
chengduoZH 已提交
322
                    (channel * im_height + im_row_offset) * im_width +
H
hedaoyuan 已提交
323 324
                    im_col_offset;
                im_data[im_offset] += col_data[col_offset];
H
hedaoyuan 已提交
325 326 327 328 329 330 331 332 333
              }
            }
          }
        }
      }
    }
  }
};

H
hedaoyuan 已提交
334
template class Im2ColFunctor<paddle::operators::math::ColFormat::kOCF,
Q
QI JUN 已提交
335
                             platform::CPUDeviceContext, float>;
H
hedaoyuan 已提交
336
template class Im2ColFunctor<paddle::operators::math::ColFormat::kOCF,
Q
QI JUN 已提交
337
                             platform::CPUDeviceContext, double>;
H
hedaoyuan 已提交
338
template class Col2ImFunctor<paddle::operators::math::ColFormat::kOCF,
Q
QI JUN 已提交
339
                             platform::CPUDeviceContext, float>;
H
hedaoyuan 已提交
340
template class Col2ImFunctor<paddle::operators::math::ColFormat::kOCF,
Q
QI JUN 已提交
341
                             platform::CPUDeviceContext, double>;
H
hedaoyuan 已提交
342

343
}  // namespace math
344
}  // namespace operators
H
hedaoyuan 已提交
345
}  // namespace paddle