diff --git a/paddle/fluid/operators/math/im2col.cc b/paddle/fluid/operators/math/im2col.cc index 90783ba1c6fd4dd0e9e7d6fdb70fd5e3c2a230c8..c29a1373194d6efe6c227e9ed57ce042e64713d6 100644 --- a/paddle/fluid/operators/math/im2col.cc +++ b/paddle/fluid/operators/math/im2col.cc @@ -72,11 +72,11 @@ class Im2ColFunctor(0); // padding zero + for (int ic = 0; ic < im_channels; ++ic) { + // TODO(TJ): use add and resue stride + T* dst_data_ic = col_data + ic * col_block_ic; + for (int kh = 0; kh < filter_height; ++kh) { + T* dst_data_kh = dst_data_ic + kh * col_block_fh; + for (T* dst_data : + {dst_data_kh, dst_data_kh + + (filter_width - prw) * col_matrix_width + + output_width - 1}) { + // TODO(TJ): from plh, saving repeated assignment + for (int oh = 0; oh < output_height; ++oh) { + *dst_data = pad; + dst_data = dst_data + output_width; + } + } + } + } + } else { + // padding_size > 1 + for (int ic = 0; ic < im_channels; ++ic) { + // TODO(TJ): use add and resue stride + T* dst_data_ic = + col_data + ic * filter_width * filter_height * col_matrix_width; + for (int kh = 0; kh < filter_height; ++kh) { + T* dst_data_kh = + dst_data_ic + kh * filter_width * col_matrix_width; + for (int kw = 0; kw < plw; ++kw) { + // TODO(TJ): reuse array outside this for + size_t sz = sizeof(T) * (plw - kw); + T* dst_data = dst_data_kh + kw * col_matrix_width; + // TODO(TJ): from plh, saving repeated assignment + for (int oh = 0; oh < output_height; ++oh) { + std::memset(dst_data, 0, sz); + dst_data = dst_data + output_width; + } + } + // TODO(TJ): use reverse to save cache + for (int kw = 0; kw < prw; ++kw) { + // TODO(TJ): reuse array outside this for + auto num = (prw - kw); + size_t sz = sizeof(T) * num; + T* dst_data = dst_data_kh + + (filter_width - 1 - kw) * col_matrix_width + + output_width - num; + // TODO(TJ): from plh, saving repeated assignment + for (int oh = 0; oh < output_height; ++oh) { + std::memset(dst_data, 0, sz); + dst_data = dst_data + output_width; + } + } + } + } + } return; } }