提交 1a615b48 编写于 作者: H hedaoyuan

Gpu test of im2col.

上级 3f555001
...@@ -83,8 +83,9 @@ class Im2ColFunctor<kCFO, platform::GPUPlace, T> { ...@@ -83,8 +83,9 @@ class Im2ColFunctor<kCFO, platform::GPUPlace, T> {
int block_y = (blocks + 512 - 1) / 512; int block_y = (blocks + 512 - 1) / 512;
dim3 threads(1024, 1); dim3 threads(1024, 1);
dim3 grid(block_x, block_y); dim3 grid(block_x, block_y);
// TODO(hedaoyuan): launch kernel on specified stream im2col<T><<<
im2col<T><<<grid, threads>>>( grid, threads, 0,
reinterpret_cast<platform::CUDADeviceContext*>(context)->stream()>>>(
im.data<T>(), num_outputs, input_height, input_width, filter_height, im.data<T>(), num_outputs, input_height, input_width, filter_height,
filter_width, stride_height, stride_width, padding_height, filter_width, stride_height, stride_width, padding_height,
padding_width, output_height, output_width, col.data<T>()); padding_width, output_height, output_width, col.data<T>());
...@@ -171,8 +172,9 @@ class Col2ImFunctor<kCFO, platform::GPUPlace, T> { ...@@ -171,8 +172,9 @@ class Col2ImFunctor<kCFO, platform::GPUPlace, T> {
// To avoid involving atomic operations, we will launch one kernel per // To avoid involving atomic operations, we will launch one kernel per
// bottom dimension, and then in the kernel add up the top dimensions. // bottom dimension, and then in the kernel add up the top dimensions.
// TODO(hedaoyuan): launch kernel on specified stream col2im<T><<<
col2im<T><<<grid, threads>>>( grid, threads, 0,
reinterpret_cast<platform::CUDADeviceContext*>(context)->stream()>>>(
num_kernels, col.data<T>(), input_height + 2 * padding_height, num_kernels, col.data<T>(), input_height + 2 * padding_height,
input_width + 2 * padding_width, input_channels, filter_height, input_width + 2 * padding_width, input_channels, filter_height,
filter_width, stride_height, stride_width, padding_height, filter_width, stride_height, stride_width, padding_height,
...@@ -259,8 +261,9 @@ class Im2ColFunctor<kOCF, platform::GPUPlace, T> { ...@@ -259,8 +261,9 @@ class Im2ColFunctor<kOCF, platform::GPUPlace, T> {
dim3 threads(block_dim_x, block_dim_y, dim3 threads(block_dim_x, block_dim_y,
std::min(block_dim_z, input_channels)); std::min(block_dim_z, input_channels));
dim3 grid(output_width, output_height); dim3 grid(output_width, output_height);
// TODO(hedaoyuan): launch kernel on specified stream im2colOCF<T><<<
im2colOCF<T><<<grid, threads>>>( grid, threads, 0,
reinterpret_cast<platform::CUDADeviceContext*>(context)->stream()>>>(
im.data<T>(), col.data<T>(), input_channels, input_height, input_width, im.data<T>(), col.data<T>(), input_channels, input_height, input_width,
filter_height, filter_width, stride_height, stride_width, filter_height, filter_width, stride_height, stride_width,
padding_height, padding_width, output_height, output_width); padding_height, padding_width, output_height, output_width);
...@@ -340,8 +343,9 @@ class Col2ImFunctor<kOCF, platform::GPUPlace, T> { ...@@ -340,8 +343,9 @@ class Col2ImFunctor<kOCF, platform::GPUPlace, T> {
dim3 threads(block_dim_x, block_dim_y, dim3 threads(block_dim_x, block_dim_y,
std::min(block_dim_z, input_channels)); std::min(block_dim_z, input_channels));
dim3 grid(output_width, output_height); dim3 grid(output_width, output_height);
// TODO(hedaoyuan): launch kernel on specified stream col2imOCF<T><<<
col2imOCF<T><<<grid, threads>>>( grid, threads, 0,
reinterpret_cast<platform::CUDADeviceContext*>(context)->stream()>>>(
im.data<T>(), col.data<T>(), input_channels, input_height, input_width, im.data<T>(), col.data<T>(), input_channels, input_height, input_width,
filter_height, filter_width, stride_height, stride_width, filter_height, filter_width, stride_height, stride_width,
padding_height, padding_width, output_height, output_width); padding_height, padding_width, output_height, output_width);
......
...@@ -16,19 +16,13 @@ limitations under the License. */ ...@@ -16,19 +16,13 @@ limitations under the License. */
#include <gtest/gtest.h> #include <gtest/gtest.h>
#include <iostream> #include <iostream>
TEST(math, im2col) { template <typename Place>
void testIm2col() {
paddle::framework::Tensor input_tmp;
paddle::framework::Tensor input; paddle::framework::Tensor input;
paddle::framework::Tensor output_cfo; paddle::framework::Tensor output_cfo;
paddle::framework::Tensor output_ocf; paddle::framework::Tensor output_ocf;
paddle::framework::Tensor input_check; paddle::framework::Tensor output_tmp;
int input_height = 2;
int input_width = 3;
int filter_size = 2;
int stride = 1;
int padding = 0;
int output_height = (input_height - filter_size + 2 * padding) / stride + 1;
int output_width = (input_width - filter_size + 2 * padding) / stride + 1;
/** /**
* input = [0, 1, 2, * input = [0, 1, 2,
...@@ -42,31 +36,54 @@ TEST(math, im2col) { ...@@ -42,31 +36,54 @@ TEST(math, im2col) {
* output_ocf = [0, 1, 3, 4 * output_ocf = [0, 1, 3, 4
* 1, 2, 4, 5] * 1, 2, 4, 5]
*/ */
auto* cpu_place = new paddle::platform::CPUPlace(); int input_height = 2;
float* input_ptr = int input_width = 3;
input.mutable_data<float>({1, input_height, input_width}, *cpu_place); int filter_size = 2;
int stride = 1;
int padding = 0;
int output_height = (input_height - filter_size + 2 * padding) / stride + 1;
int output_width = (input_width - filter_size + 2 * padding) / stride + 1;
float* input_ptr = input_tmp.mutable_data<float>(
{1, input_height, input_width}, paddle::platform::CPUPlace());
float arr[6] = {0, 1, 2, 3, 4, 5}; float arr[6] = {0, 1, 2, 3, 4, 5};
memcpy(input_ptr, arr, 6 * sizeof(float)); memcpy(input_ptr, arr, 6 * sizeof(float));
auto* place = new Place();
if (paddle::platform::is_cpu_place(*place)) {
input = input_tmp;
} else {
input.CopyFrom<float>(input_tmp, *place);
}
output_cfo.mutable_data<float>( output_cfo.mutable_data<float>(
{1, filter_size, filter_size, output_height, output_width}, *cpu_place); {1, filter_size, filter_size, output_height, output_width}, *place);
output_ocf.mutable_data<float>( output_ocf.mutable_data<float>(
{output_height, output_width, 1, filter_size, filter_size}, *cpu_place); {output_height, output_width, 1, filter_size, filter_size}, *place);
paddle::operators::math::Im2ColFunctor< paddle::operators::math::Im2ColFunctor<
paddle::operators::math::ColFormat::kCFO, paddle::platform::CPUPlace, paddle::operators::math::ColFormat::kCFO, Place, float>
float>
im2col; im2col;
paddle::operators::math::Im2ColFunctor< paddle::operators::math::Im2ColFunctor<
paddle::operators::math::ColFormat::kOCF, paddle::platform::CPUPlace, paddle::operators::math::ColFormat::kOCF, Place, float>
float>
im2col_ocf; im2col_ocf;
paddle::platform::DeviceContext* context = paddle::platform::DeviceContext* context;
new paddle::platform::CPUDeviceContext(*cpu_place); if (paddle::platform::is_cpu_place(*place)) {
context =
new paddle::platform::CPUDeviceContext(paddle::platform::CPUPlace());
} else {
context =
new paddle::platform::CUDADeviceContext(paddle::platform::GPUPlace());
}
im2col(input, output_cfo, stride, stride, padding, padding, context); im2col(input, output_cfo, stride, stride, padding, padding, context);
im2col_ocf(input, output_ocf, stride, stride, padding, padding, context); im2col_ocf(input, output_ocf, stride, stride, padding, padding, context);
float* out_cfo_ptr = output_cfo.data<float>(); float* out_cfo_ptr;
if (paddle::platform::is_cpu_place(*place)) {
out_cfo_ptr = output_cfo.data<float>();
} else {
output_tmp.CopyFrom<float>(output_cfo, paddle::platform::CPUPlace());
out_cfo_ptr = output_tmp.data<float>();
}
EXPECT_EQ(out_cfo_ptr[0], 0); EXPECT_EQ(out_cfo_ptr[0], 0);
EXPECT_EQ(out_cfo_ptr[1], 1); EXPECT_EQ(out_cfo_ptr[1], 1);
EXPECT_EQ(out_cfo_ptr[2], 1); EXPECT_EQ(out_cfo_ptr[2], 1);
...@@ -76,7 +93,13 @@ TEST(math, im2col) { ...@@ -76,7 +93,13 @@ TEST(math, im2col) {
EXPECT_EQ(out_cfo_ptr[6], 4); EXPECT_EQ(out_cfo_ptr[6], 4);
EXPECT_EQ(out_cfo_ptr[7], 5); EXPECT_EQ(out_cfo_ptr[7], 5);
float* out_ocf_ptr = output_ocf.data<float>(); float* out_ocf_ptr;
if (paddle::platform::is_cpu_place(*place)) {
out_ocf_ptr = output_ocf.data<float>();
} else {
output_tmp.CopyFrom<float>(output_ocf, paddle::platform::CPUPlace());
out_ocf_ptr = output_tmp.data<float>();
}
EXPECT_EQ(out_ocf_ptr[0], 0); EXPECT_EQ(out_ocf_ptr[0], 0);
EXPECT_EQ(out_ocf_ptr[1], 1); EXPECT_EQ(out_ocf_ptr[1], 1);
EXPECT_EQ(out_ocf_ptr[2], 3); EXPECT_EQ(out_ocf_ptr[2], 3);
...@@ -86,3 +109,10 @@ TEST(math, im2col) { ...@@ -86,3 +109,10 @@ TEST(math, im2col) {
EXPECT_EQ(out_ocf_ptr[6], 4); EXPECT_EQ(out_ocf_ptr[6], 4);
EXPECT_EQ(out_ocf_ptr[7], 5); EXPECT_EQ(out_ocf_ptr[7], 5);
} }
TEST(math, im2col) {
testIm2col<paddle::platform::CPUPlace>();
#ifndef PADDLE_ONLY_CPU
testIm2col<paddle::platform::GPUPlace>();
#endif
}
\ No newline at end of file
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册