diff --git a/paddle/operators/gemm_conv2d_op.h b/paddle/operators/gemm_conv2d_op.h index 08b7df1dfead72fe8de8e89fa633c7bfc7bdbf33..5c9e81732aa72211c2021382cf9a907880c53c17 100644 --- a/paddle/operators/gemm_conv2d_op.h +++ b/paddle/operators/gemm_conv2d_op.h @@ -75,9 +75,6 @@ class GemmConv2DKernel : public framework::OpKernel { framework::DDim output_matrix_shape = {output_channels, output_height * output_width}; - auto* device_context = - const_cast(context.device_context_); - // convolution operator: im2col + gemm int in_step = input_channels / groups; int out_step = output_channels / groups; @@ -87,14 +84,14 @@ class GemmConv2DKernel : public framework::OpKernel { for (int g = 0; g < groups; g++) { // im2col Tensor in_slice = in_batch.Slice(g * in_step, (g + 1) * in_step); - im2col(in_slice, col, strides[0], strides[1], paddings[0], paddings[1], - device_context); + im2col(context.device_context(), in_slice, col, strides[0], strides[1], + paddings[0], paddings[1]); // gemm Tensor out_slice = out_batch.Slice(g * out_step, (g + 1) * out_step); Tensor filter_slice = filter.Slice(g * out_step, (g + 1) * out_step); - math::matmul(filter_slice, false, col_matrix, false, T(1.0), - &out_slice, T(0.0), device_context); + math::matmul(context.device_context(), filter_slice, false, + col_matrix, false, T(1.0), &out_slice, T(0.0)); } } } @@ -160,9 +157,6 @@ class GemmConvGrad2DKernel : public framework::OpKernel { filter.numel() / filter.dims()[0]}; filter.Resize(filter_matrix_shape); - auto* device_context = - const_cast(context.device_context_); - // convolution backward input operator: gemm + col2im // convolution backward weight operator: im2col + gemm int in_step = input_channels / groups; @@ -184,14 +178,15 @@ class GemmConvGrad2DKernel : public framework::OpKernel { out_grad_batch.Slice(g * out_step, (g + 1) * out_step); Tensor filter_slice = filter.Slice(g * out_step, (g + 1) * out_step); - math::matmul(filter_slice, true, out_grad_slice, false, - T(1.0), &col_matrix, T(0.0), device_context); + math::matmul(context.device_context(), filter_slice, true, + out_grad_slice, false, T(1.0), &col_matrix, + T(0.0)); // col2im Tensor in_grad_slice = in_grad_batch.Slice(g * in_step, (g + 1) * in_step); - col2im(in_grad_slice, col, strides[0], strides[1], paddings[0], - paddings[1], device_context); + col2im(context.device_context(), in_grad_slice, col, strides[0], + strides[1], paddings[0], paddings[1]); } } } @@ -212,15 +207,15 @@ class GemmConvGrad2DKernel : public framework::OpKernel { Tensor out_grad_slice = out_grad_batch.Slice(g * out_step, (g + 1) * out_step); Tensor in_slice = in_batch.Slice(g * in_step, (g + 1) * in_step); - im2col(in_slice, col, strides[0], strides[1], paddings[0], - paddings[1], device_context); + im2col(context.device_context(), in_slice, col, strides[0], + strides[1], paddings[0], paddings[1]); // gemm Tensor filter_grad_slice = filter_grad_.Slice(g * out_step, (g + 1) * out_step); - math::matmul(out_grad_slice, false, col_matrix, true, - T(1.0), &filter_grad_slice, T(1.0), - device_context); + math::matmul(context.device_context(), out_grad_slice, + false, col_matrix, true, T(1.0), + &filter_grad_slice, T(1.0)); } } } diff --git a/paddle/operators/math/im2col.cc b/paddle/operators/math/im2col.cc index 5727c1cab16c1379ffe77f5594c057e93a042785..c08a3380f042886cd400df0d840e61856274619c 100644 --- a/paddle/operators/math/im2col.cc +++ b/paddle/operators/math/im2col.cc @@ -27,9 +27,10 @@ template class Im2ColFunctor { public: - void operator()(const framework::Tensor& im, framework::Tensor& col, + void operator()(const platform::DeviceContext& context, + const framework::Tensor& im, framework::Tensor& col, int stride_height, int stride_width, int padding_height, - int padding_width, platform::DeviceContext* context) { + int padding_width) { PADDLE_ENFORCE(im.dims().size() == 3); PADDLE_ENFORCE(col.dims().size() == 5); @@ -79,9 +80,9 @@ template class Col2ImFunctor { public: - void operator()(framework::Tensor& im, const framework::Tensor& col, - int stride_height, int stride_width, int padding_height, - int padding_width, platform::DeviceContext* context) { + void operator()(const platform::DeviceContext& context, framework::Tensor& im, + const framework::Tensor& col, int stride_height, + int stride_width, int padding_height, int padding_width) { PADDLE_ENFORCE(im.dims().size() == 3); PADDLE_ENFORCE(col.dims().size() == 5); int input_channels = im.dims()[0]; @@ -137,9 +138,10 @@ template class Im2ColFunctor { public: - void operator()(const framework::Tensor& im, framework::Tensor& col, + void operator()(const platform::DeviceContext& context, + const framework::Tensor& im, framework::Tensor& col, int stride_height, int stride_width, int padding_height, - int padding_width, platform::DeviceContext* context) { + int padding_width) { PADDLE_ENFORCE(im.dims().size() == 3); PADDLE_ENFORCE(col.dims().size() == 5); int input_channels = im.dims()[0]; @@ -197,9 +199,9 @@ template class Col2ImFunctor { public: - void operator()(framework::Tensor& im, const framework::Tensor& col, - int stride_height, int stride_width, int padding_height, - int padding_width, platform::DeviceContext* context) { + void operator()(const platform::DeviceContext& context, framework::Tensor& im, + const framework::Tensor& col, int stride_height, + int stride_width, int padding_height, int padding_width) { PADDLE_ENFORCE(im.dims().size() == 3); PADDLE_ENFORCE(col.dims().size() == 5); int input_channels = im.dims()[0]; diff --git a/paddle/operators/math/im2col.cu b/paddle/operators/math/im2col.cu index 9bff7bee3c95093852305d392af0949b831e5665..01f60bfe70f844fdcfd5aa481c27d9f12ec51305 100644 --- a/paddle/operators/math/im2col.cu +++ b/paddle/operators/math/im2col.cu @@ -64,9 +64,10 @@ template class Im2ColFunctor { public: - void operator()(const framework::Tensor& im, framework::Tensor& col, + void operator()(const platform::DeviceContext& context, + const framework::Tensor& im, framework::Tensor& col, int stride_height, int stride_width, int padding_height, - int padding_width, platform::DeviceContext* context) { + int padding_width) { PADDLE_ENFORCE(im.dims().size() == 3); PADDLE_ENFORCE(col.dims().size() == 5); @@ -84,9 +85,9 @@ class Im2ColFunctor<<< - grid, threads, 0, - reinterpret_cast(context)->stream()>>>( + im2col<<(context) + .stream()>>>( im.data(), num_outputs, input_height, input_width, filter_height, filter_width, stride_height, stride_width, padding_height, padding_width, output_height, output_width, col.data()); @@ -149,9 +150,9 @@ template class Col2ImFunctor { public: - void operator()(framework::Tensor& im, const framework::Tensor& col, - int stride_height, int stride_width, int padding_height, - int padding_width, platform::DeviceContext* context) { + void operator()(const platform::DeviceContext& context, framework::Tensor& im, + const framework::Tensor& col, int stride_height, + int stride_width, int padding_height, int padding_width) { PADDLE_ENFORCE(im.dims().size() == 3); PADDLE_ENFORCE(col.dims().size() == 5); @@ -174,9 +175,9 @@ class Col2ImFunctor<<< - grid, threads, 0, - reinterpret_cast(context)->stream()>>>( + col2im<<(context) + .stream()>>>( num_kernels, col.data(), input_height + 2 * padding_height, input_width + 2 * padding_width, input_channels, filter_height, filter_width, stride_height, stride_width, padding_height, @@ -235,9 +236,10 @@ template class Im2ColFunctor { public: - void operator()(const framework::Tensor& im, framework::Tensor& col, + void operator()(const platform::DeviceContext& context, + const framework::Tensor& im, framework::Tensor& col, int stride_height, int stride_width, int padding_height, - int padding_width, platform::DeviceContext* context) { + int padding_width) { PADDLE_ENFORCE(im.dims().size() == 3); PADDLE_ENFORCE(col.dims().size() == 5); int input_channels = im.dims()[0]; @@ -268,9 +270,9 @@ class Im2ColFunctor<<< - grid, threads, 0, - reinterpret_cast(context)->stream()>>>( + im2colOCF<<(context) + .stream()>>>( im.data(), col.data(), input_channels, input_height, input_width, filter_height, filter_width, stride_height, stride_width, padding_height, padding_width, output_height, output_width); @@ -318,9 +320,9 @@ template class Col2ImFunctor { public: - void operator()(framework::Tensor& im, const framework::Tensor& col, - int stride_height, int stride_width, int padding_height, - int padding_width, platform::DeviceContext* context) { + void operator()(const platform::DeviceContext& context, framework::Tensor& im, + const framework::Tensor& col, int stride_height, + int stride_width, int padding_height, int padding_width) { PADDLE_ENFORCE(im.dims().size() == 3); PADDLE_ENFORCE(col.dims().size() == 5); int input_channels = im.dims()[0]; @@ -351,9 +353,9 @@ class Col2ImFunctor<<< - grid, threads, 0, - reinterpret_cast(context)->stream()>>>( + col2imOCF<<(context) + .stream()>>>( im.data(), col.data(), input_channels, input_height, input_width, filter_height, filter_width, stride_height, stride_width, padding_height, padding_width, output_height, output_width); diff --git a/paddle/operators/math/im2col.h b/paddle/operators/math/im2col.h index 8958c5457cc2c3034c34ca82fb2e98cc06be63c5..7b717e1603c94cd77c74cb0d86f1d23e2692f9d8 100644 --- a/paddle/operators/math/im2col.h +++ b/paddle/operators/math/im2col.h @@ -72,17 +72,18 @@ enum class ColFormat { kCFO = 0, kOCF = 1 }; template class Im2ColFunctor { public: - void operator()(const framework::Tensor& im, framework::Tensor& col, + void operator()(const platform::DeviceContext& context, + const framework::Tensor& im, framework::Tensor& col, int stride_height, int stride_width, int padding_height, - int padding_width, platform::DeviceContext* context); + int padding_width); }; template class Col2ImFunctor { public: - void operator()(framework::Tensor& im, const framework::Tensor& col, - int stride_height, int stride_width, int padding_height, - int padding_width, platform::DeviceContext* context); + void operator()(const platform::DeviceContext& context, framework::Tensor& im, + const framework::Tensor& col, int stride_height, + int stride_width, int padding_height, int padding_width); }; } // namespace math diff --git a/paddle/operators/math/im2col_test.cc b/paddle/operators/math/im2col_test.cc index 4f380388b108dc173d847f027ba5c9db387a87f8..f0b8c885918afe7f80edc465c6d9be7c11ac066f 100644 --- a/paddle/operators/math/im2col_test.cc +++ b/paddle/operators/math/im2col_test.cc @@ -78,8 +78,8 @@ void testIm2col() { PADDLE_THROW("no GPU support"); #endif // PADDLE_ONLY_CPU } - im2col(input, output_cfo, stride, stride, padding, padding, context); - im2col_ocf(input, output_ocf, stride, stride, padding, padding, context); + im2col(*context, input, output_cfo, stride, stride, padding, padding); + im2col_ocf(*context, input, output_ocf, stride, stride, padding, padding); float* out_cfo_ptr; if (paddle::platform::is_cpu_place(*place)) { diff --git a/python/paddle/v2/framework/tests/test_conv2d_op.py b/python/paddle/v2/framework/tests/test_conv2d_op.py index 3142a60a1ae7d1874d02b81a4bb90c1fc50d07b9..118a5fc1cde5f4a908b065d581956e0855d50a52 100644 --- a/python/paddle/v2/framework/tests/test_conv2d_op.py +++ b/python/paddle/v2/framework/tests/test_conv2d_op.py @@ -73,13 +73,22 @@ class TestConv2dOp(OpTest): self.check_output() def test_check_grad(self): - self.check_grad(set(['Input', 'Filter']), 'Output') + self.check_grad( + set(['Input', 'Filter']), 'Output', max_relative_error=0.05) def test_check_grad_no_filter(self): - self.check_grad(['Input'], 'Output', no_grad_set=set(['Filter'])) + self.check_grad( + ['Input'], + 'Output', + max_relative_error=0.05, + no_grad_set=set(['Filter'])) def test_check_grad_no_input(self): - self.check_grad(['Filter'], 'Output', no_grad_set=set(['Input'])) + self.check_grad( + ['Filter'], + 'Output', + max_relative_error=0.05, + no_grad_set=set(['Input'])) def init_groups(self): self.groups = 1