From 89530384008b023dc1e8c51e5a8e7e710718efff Mon Sep 17 00:00:00 2001 From: Leo Chen Date: Wed, 28 Oct 2020 08:50:39 -0500 Subject: [PATCH] Fix transpose in conv cudnn kernel when addto enabled (#28295) --- paddle/fluid/operators/conv_cudnn_op.cu | 14 ++++++- .../unittests/test_inplace_addto_strategy.py | 39 +++++++++++-------- 2 files changed, 35 insertions(+), 18 deletions(-) diff --git a/paddle/fluid/operators/conv_cudnn_op.cu b/paddle/fluid/operators/conv_cudnn_op.cu index f8b76f387cc..3f03df04ea3 100644 --- a/paddle/fluid/operators/conv_cudnn_op.cu +++ b/paddle/fluid/operators/conv_cudnn_op.cu @@ -293,8 +293,12 @@ class CUDNNConvOpKernel : public framework::OpKernel { // ------------------- cudnn conv forward --------------------- ScalingParamType alpha = 1.0f; - ScalingParamType beta = ctx.Attr("use_addto") ? 1.0f : 0.0f; - VLOG(4) << "Conv: use_addto = " << ctx.Attr("use_addto"); + ScalingParamType beta = 0.0f; + + // NOTE(zhiqiu): inplace addto is not supportted in double grad yet. + // ScalingParamType beta = ctx.Attr("use_addto") ? 1.0f : 0.0f; + // VLOG(4) << "Conv: use_addto = " << ctx.Attr("use_addto"); + for (int i = 0; i < groups; i++) { workspace_handle.RunFunc( [&](void* workspace_ptr) { @@ -387,6 +391,12 @@ class CUDNNConvGradOpKernel : public framework::OpKernel { if (input_grad) { ResizeToChannelFirst( ctx, input_grad, &transformed_input_grad_channel); + // NOTE(zhiqiu): If inplace_addto strategy is enabled, we need to copy + // the data of input_grad to transformed_input_grad_channel. + if (ctx.Attr("use_addto")) { + TransToChannelFirst( + ctx, input_grad, &transformed_input_grad_channel); + } } } else { transformed_input_channel.ShareDataWith(*input); diff --git a/python/paddle/fluid/tests/unittests/test_inplace_addto_strategy.py b/python/paddle/fluid/tests/unittests/test_inplace_addto_strategy.py index 0c43d569345..b9089448d53 100644 --- a/python/paddle/fluid/tests/unittests/test_inplace_addto_strategy.py +++ b/python/paddle/fluid/tests/unittests/test_inplace_addto_strategy.py @@ -30,22 +30,21 @@ class ConvBNLayer(fluid.Layer): filter_size, stride=1, groups=1, - act=None, - use_cudnn=False): + data_format="NCHW"): super(ConvBNLayer, self).__init__() - self._conv = fluid.dygraph.Conv2D( - num_channels=num_channels, - num_filters=num_filters, - filter_size=filter_size, + self._conv = paddle.nn.Conv2D( + in_channels=num_channels, + out_channels=num_filters, + kernel_size=filter_size, stride=stride, padding=(filter_size - 1) // 2, groups=groups, - act=None, bias_attr=False, - use_cudnn=use_cudnn) + data_format=data_format) - self._batch_norm = fluid.dygraph.BatchNorm(num_filters, act=act) + self._batch_norm = paddle.nn.BatchNorm( + num_filters, data_layout=data_format) def forward(self, inputs): y = self._conv(inputs) @@ -53,19 +52,20 @@ class ConvBNLayer(fluid.Layer): return y -def create_program(): +def create_program(data_format="NCHW"): main = fluid.Program() startup = fluid.Program() with fluid.program_guard(main, startup): x = fluid.data(name='img', shape=[-1, 3, 224, 224]) x.stop_gradient = False + if data_format == "NHWC": + x = paddle.transpose(x, [0, 2, 3, 1]) x = fluid.layers.prelu(x, mode="channel") conv = ConvBNLayer( num_channels=3, num_filters=3, filter_size=1, - act='relu', - use_cudnn=True) + data_format=data_format) y = conv(x) + x loss = fluid.layers.reduce_sum(y) @@ -77,7 +77,7 @@ def create_program(): class TestInplaceAddto(unittest.TestCase): - def test_result(self): + def check_result(self, data_format="NCHW"): def run_program(enable_addto): np.random.seed(10) paddle.seed(10) @@ -85,7 +85,7 @@ class TestInplaceAddto(unittest.TestCase): if fluid.core.is_compiled_with_cuda(): fluid.set_flags({"FLAGS_cudnn_deterministic": True}) fluid.set_flags({"FLAGS_max_inplace_grad_add": 2}) - loss, main, startup, w = create_program() + loss, main, startup, w = create_program(data_format=data_format) place = fluid.CUDAPlace(0) if fluid.core.is_compiled_with_cuda( ) else fluid.CPUPlace() exe = fluid.Executor(place) @@ -98,7 +98,7 @@ class TestInplaceAddto(unittest.TestCase): exe.run(startup) img = np.random.uniform(-128, 128, [8, 3, 224, 224]).astype(np.float32) - for i in range(2): + for i in range(10): res = exe.run(compiled, feed={'img': img}, fetch_list=[loss.name, w.name]) @@ -106,9 +106,16 @@ class TestInplaceAddto(unittest.TestCase): res1, w1 = run_program(True) res2, w2 = run_program(False) - print(res1, res2) + self.assertTrue(np.array_equal(res1, res2)) + def test_nchw(self): + self.check_result() + + def test_nhwc(self): + self.check_result("NHWC") + if __name__ == "__main__": + paddle.enable_static() unittest.main() -- GitLab