From d2c1408f91b812332f781ec6be51edb855678fa6 Mon Sep 17 00:00:00 2001 From: chengduoZH Date: Mon, 16 Oct 2017 19:16:08 +0800 Subject: [PATCH] fix im2col kocf for sequence projection --- paddle/operators/math/im2col.cc | 70 +++++++++++++++------- paddle/operators/math/im2col.cu | 54 +++++++++++++---- paddle/operators/math/im2col_test.cc | 89 ++++++++++++++++++++++------ 3 files changed, 162 insertions(+), 51 deletions(-) diff --git a/paddle/operators/math/im2col.cc b/paddle/operators/math/im2col.cc index c08a3380f0..729ba8665c 100644 --- a/paddle/operators/math/im2col.cc +++ b/paddle/operators/math/im2col.cc @@ -140,8 +140,8 @@ class Im2ColFunctor= down_pad) { + row_begin = 0; + } else { + row_begin = down_pad - up_pad; + } + row_end = row_begin + ((input_height + up_pad + down_pad - filter_height) / + stride_height + + 1); + const T* im_data = im.data(); T* col_data = col.data(); - for (int col_row_idx = 0; col_row_idx < output_height; ++col_row_idx) { + for (int col_row_idx = row_begin; col_row_idx < row_end; ++col_row_idx) { for (int col_col_idx = 0; col_col_idx < output_width; ++col_col_idx) { for (int channel = 0; channel < input_channels; ++channel) { for (int filter_row_idx = 0; filter_row_idx < filter_height; @@ -166,13 +178,14 @@ class Im2ColFunctor= input_height || im_col_offset < 0 || im_col_offset >= input_width) { col_data[col_offset] = T(0); @@ -201,7 +214,7 @@ class Col2ImFunctor= down_pad) { + row_begin = 0; + } else { + row_begin = down_pad - up_pad; + } + row_end = row_begin + ((input_height + up_pad + down_pad - filter_height) / + stride_height + + 1); + T* im_data = im.data(); const T* col_data = col.data(); - for (int col_row_idx = 0; col_row_idx < output_height; ++col_row_idx) { + for (int col_row_idx = row_begin; col_row_idx < row_end; ++col_row_idx) { for (int col_col_idx = 0; col_col_idx < output_width; ++col_col_idx) { for (int channel = 0; channel < input_channels; ++channel) { for (int filter_row_idx = 0; filter_row_idx < filter_height; ++filter_row_idx) { for (int filter_col_idx = 0; filter_col_idx < filter_width; ++filter_col_idx) { - int im_row_offset = + int im_row_offset = // change or not ??? col_row_idx * stride_height + filter_row_idx - padding_height; int im_col_offset = col_col_idx * stride_width + filter_col_idx - padding_width; - int col_offset = (((col_row_idx * output_width + col_col_idx) * - input_channels + - channel) * - filter_height + - filter_row_idx) * - filter_width + - filter_col_idx; + int col_offset = + ((((col_row_idx - row_begin) * output_width + col_col_idx) * + input_channels + + channel) * + filter_height + + filter_row_idx) * + filter_width + + filter_col_idx; if (im_row_offset >= 0 && im_row_offset < input_height && im_col_offset >= 0 && im_col_offset < input_width) { int im_offset = diff --git a/paddle/operators/math/im2col.cu b/paddle/operators/math/im2col.cu index 01f60bfe70..2416758629 100644 --- a/paddle/operators/math/im2col.cu +++ b/paddle/operators/math/im2col.cu @@ -199,7 +199,8 @@ __global__ void im2colOCF(const T* im_data, T* col_data, int input_channels, int input_height, int input_width, int filter_height, int filter_width, int stride_height, int stride_width, int padding_height, int padding_width, - int output_height, int output_width) { + int output_height, int output_width, int row_begin, + int row_end) { int swid = blockIdx.x; int shid = blockIdx.y; for (int channelid = threadIdx.z; channelid < input_channels; @@ -207,7 +208,8 @@ __global__ void im2colOCF(const T* im_data, T* col_data, int input_channels, for (int idy = threadIdx.y; idy < filter_height; idy += blockDim.y) { for (int idx = threadIdx.x; idx < filter_width; idx += blockDim.x) { int width_offset = idx + swid * stride_width - padding_width; - int height_offset = idy + shid * stride_height - padding_height; + int height_offset = + idy + (shid + row_begin) * stride_height - padding_height; int im_offset = width_offset + height_offset * input_width + channelid * input_height * input_width; @@ -238,8 +240,8 @@ class Im2ColFunctor= down_pad) { + row_begin = 0; + } else { + row_begin = down_pad - up_pad; + } + row_end = row_begin + ((input_height + up_pad + down_pad - filter_height) / + stride_height + + 1); + + int output_height = row_end - row_begin; // col.dims()[0]; int output_width = col.dims()[1]; int block_dim_x = 0; @@ -275,7 +290,8 @@ class Im2ColFunctor>>( im.data(), col.data(), input_channels, input_height, input_width, filter_height, filter_width, stride_height, stride_width, - padding_height, padding_width, output_height, output_width); + padding_height, padding_width, output_height, output_width, row_begin, + row_end); } }; @@ -284,7 +300,8 @@ __global__ void col2imOCF(T* im_data, const T* col_data, int input_channels, int input_height, int input_width, int filter_height, int filter_width, int stride_height, int stride_width, int padding_height, int padding_width, - int output_height, int output_width) { + int output_height, int output_width, int row_begin, + int row_end) { int swid = blockIdx.x; int shid = blockIdx.y; for (int channelid = threadIdx.z; channelid < input_channels; @@ -292,7 +309,8 @@ __global__ void col2imOCF(T* im_data, const T* col_data, int input_channels, for (int idy = threadIdx.y; idy < filter_height; idy += blockDim.y) { for (int idx = threadIdx.x; idx < filter_width; idx += blockDim.x) { int width_offset = idx + swid * stride_width - padding_width; - int height_offset = idy + shid * stride_height - padding_height; + int height_offset = + idy + (shid + row_begin) * stride_height - padding_height; int im_offset = width_offset + height_offset * input_width + channelid * input_height * input_width; @@ -322,7 +340,7 @@ class Col2ImFunctor= down_pad) { + row_begin = 0; + } else { + row_begin = down_pad - up_pad; + } + row_end = row_begin + ((input_height + up_pad + down_pad - filter_height) / + stride_height + + 1); + + int output_height = row_end - row_begin; // col.dims()[0]; int output_width = col.dims()[1]; int block_dim_x = 0; @@ -358,7 +389,8 @@ class Col2ImFunctor>>( im.data(), col.data(), input_channels, input_height, input_width, filter_height, filter_width, stride_height, stride_width, - padding_height, padding_width, output_height, output_width); + padding_height, padding_width, output_height, output_width, row_begin, + row_end); } }; diff --git a/paddle/operators/math/im2col_test.cc b/paddle/operators/math/im2col_test.cc index 9c506ae89b..6406d43a9b 100644 --- a/paddle/operators/math/im2col_test.cc +++ b/paddle/operators/math/im2col_test.cc @@ -35,6 +35,12 @@ void testIm2col() { * * output_ocf = [0, 1, 3, 4 * 1, 2, 4, 5] + * + * col2im_cfo = [0, 2, 2 + * 3, 4, 5] + * + * col2im_ocf = [0, 2, 2 + * 3, 4, 5] */ int input_height = 2; int input_width = 3; @@ -59,7 +65,7 @@ void testIm2col() { new paddle::platform::CUDADeviceContext(paddle::platform::GPUPlace()); #else PADDLE_THROW("no GPU support"); -#endif // PADDLE_ONLY_CPU +#endif // PADDLE_WITH_CUDA } if (paddle::platform::is_cpu_place(*place)) { input = input_tmp; @@ -71,6 +77,7 @@ void testIm2col() { output_ocf.mutable_data( {output_height, output_width, 1, filter_size, filter_size}, *place); + // Im2Col paddle::operators::math::Im2ColFunctor< paddle::operators::math::ColFormat::kCFO, Place, float> im2col; @@ -79,7 +86,12 @@ void testIm2col() { im2col_ocf; im2col(*context, input, output_cfo, stride, stride, padding, padding); - im2col_ocf(*context, input, output_ocf, stride, stride, padding, padding); + im2col_ocf(*context, input, output_ocf, /*stride_height*/ stride, + /*stride_width*/ stride, /*up_pad*/ padding, + /*down_pad*/ padding); + + float out_cfo_data[] = {0, 1, 1, 2, 3, 4, 4, 5}; + float out_ocf_data[] = {0, 1, 3, 4, 1, 2, 4, 5}; float* out_cfo_ptr; if (paddle::platform::is_cpu_place(*place)) { @@ -89,14 +101,9 @@ void testIm2col() { *context); out_cfo_ptr = output_tmp.data(); } - EXPECT_EQ(out_cfo_ptr[0], 0); - EXPECT_EQ(out_cfo_ptr[1], 1); - EXPECT_EQ(out_cfo_ptr[2], 1); - EXPECT_EQ(out_cfo_ptr[3], 2); - EXPECT_EQ(out_cfo_ptr[4], 3); - EXPECT_EQ(out_cfo_ptr[5], 4); - EXPECT_EQ(out_cfo_ptr[6], 4); - EXPECT_EQ(out_cfo_ptr[7], 5); + for (int i = 0; i < 6; ++i) { + EXPECT_EQ(out_cfo_ptr[i], out_cfo_data[i]); + } float* out_ocf_ptr; if (paddle::platform::is_cpu_place(*place)) { @@ -106,14 +113,60 @@ void testIm2col() { *context); out_ocf_ptr = output_tmp.data(); } - EXPECT_EQ(out_ocf_ptr[0], 0); - EXPECT_EQ(out_ocf_ptr[1], 1); - EXPECT_EQ(out_ocf_ptr[2], 3); - EXPECT_EQ(out_ocf_ptr[3], 4); - EXPECT_EQ(out_ocf_ptr[4], 1); - EXPECT_EQ(out_ocf_ptr[5], 2); - EXPECT_EQ(out_ocf_ptr[6], 4); - EXPECT_EQ(out_ocf_ptr[7], 5); + for (int i = 0; i < 6; ++i) { + EXPECT_EQ(out_ocf_ptr[i], out_ocf_data[i]); + } + + // Col2Im: kCFO + paddle::operators::math::Col2ImFunctor< + paddle::operators::math::ColFormat::kCFO, Place, float> + col2im; + paddle::operators::math::Col2ImFunctor< + paddle::operators::math::ColFormat::kOCF, Place, float> + col2im_ocf; + float col2im_data[] = {0, 2, 2, 3, 8, 5}; + + memset(input_ptr, 0, 6 * sizeof(float)); + if (paddle::platform::is_cpu_place(*place)) { + input = input_tmp; + } else { + input.CopyFrom(input_tmp, *place, *context); + } + + col2im(*context, input, output_cfo, stride, stride, padding, padding); + + float* in_ptr; + if (paddle::platform::is_cpu_place(*place)) { + in_ptr = input.data(); + } else { + input_tmp.CopyFrom(input, paddle::platform::CPUPlace(), *context); + in_ptr = input_tmp.data(); + } + for (int i = 0; i < 6; ++i) { + EXPECT_EQ(in_ptr[i], col2im_data[i]); + } + + // Col2Im: kOCF + memset(input_ptr, 0, 6 * sizeof(float)); + if (paddle::platform::is_cpu_place(*place)) { + input = input_tmp; + } else { + input.CopyFrom(input_tmp, *place, *context); + } + + col2im_ocf(*context, input, output_ocf, /*stride_height*/ stride, + /*stride_width*/ stride, /*up_pad*/ padding, + /*down_pad*/ padding); + + if (paddle::platform::is_cpu_place(*place)) { + in_ptr = input.data(); + } else { + input_tmp.CopyFrom(input, paddle::platform::CPUPlace(), *context); + in_ptr = input_tmp.data(); + } + for (int i = 0; i < 6; ++i) { + EXPECT_EQ(in_ptr[i], col2im_data[i]); + } } TEST(math, im2col) { -- GitLab