From 27f28e82c93a002a4cc382787316f5fd5d97997f Mon Sep 17 00:00:00 2001 From: Zhanlue Yang Date: Fri, 15 Apr 2022 15:40:51 +0800 Subject: [PATCH] [DoubleGrad] Enabled test_imperative_star_gan_with_gradient_penalty.py under eager mode (#41730) * [DoubleGrad] Enabled double grad test cases in eager_mode for test_imperative_double_grad * Fixed elementwise issue * Addressed CI failures * [DoubleGrad] Enabled test_imperative_triple_grad test cases under eager_mode * [DoubleGrad] Enabled test_autograd_functional_dynamic.py under eager mode * Enabled more test cases * [DoubleGrad] Enabled test_imperative_star_gan_with_gradient_penalty.py under eager mode * Adjusted test_imperative_star_gan_with_gradient_penalty.py --- .../final_state_generator/codegen_utils.py | 1 + paddle/fluid/eager/tensor_wrapper.h | 1 - paddle/phi/kernels/conv_grad_grad_kernel.h | 10 ++--- .../phi/kernels/cpu/conv_grad_grad_kernel.cc | 10 ++--- .../kernels/gpudnn/conv_grad_grad_kernel.cu | 30 ++++++------- .../kernels/impl/conv_grad_grad_kernel_impl.h | 10 ++--- paddle/phi/ops/compat/conv2d_sig.cc | 4 +- ...perative_star_gan_with_gradient_penalty.py | 43 ++++++++++++++++++- python/paddle/nn/functional/activation.py | 5 ++- python/paddle/utils/code_gen/backward.yaml | 26 ++++++++++- 10 files changed, 103 insertions(+), 37 deletions(-) diff --git a/paddle/fluid/eager/auto_code_generator/final_state_generator/codegen_utils.py b/paddle/fluid/eager/auto_code_generator/final_state_generator/codegen_utils.py index 96af7dfc4f..ab8c28c33e 100644 --- a/paddle/fluid/eager/auto_code_generator/final_state_generator/codegen_utils.py +++ b/paddle/fluid/eager/auto_code_generator/final_state_generator/codegen_utils.py @@ -32,6 +32,7 @@ ops_to_fill_zero_for_empty_grads = set([ "add_triple_grad", "multiply_double_grad", "multiply_triple_grad", + "conv2d_grad_grad", ]) # For API dispatch used at python-level diff --git a/paddle/fluid/eager/tensor_wrapper.h b/paddle/fluid/eager/tensor_wrapper.h index e42e04a266..405105771b 100644 --- a/paddle/fluid/eager/tensor_wrapper.h +++ b/paddle/fluid/eager/tensor_wrapper.h @@ -118,7 +118,6 @@ class TensorWrapper { p_ab_autograd_meta->SetGradNode(new_grad_node); } recovered_tensor.set_autograd_meta(p_ab_autograd_meta); - return recovered_tensor; } } diff --git a/paddle/phi/kernels/conv_grad_grad_kernel.h b/paddle/phi/kernels/conv_grad_grad_kernel.h index 339f1c00ea..0a359d778a 100644 --- a/paddle/phi/kernels/conv_grad_grad_kernel.h +++ b/paddle/phi/kernels/conv_grad_grad_kernel.h @@ -20,11 +20,11 @@ namespace phi { template void ConvGradGradKernel(const Context& dev_ctx, - paddle::optional input_grad_grad, - paddle::optional filter_grad_grad, - const DenseTensor& out_grad, const DenseTensor& input, const DenseTensor& filter, + const DenseTensor& out_grad, + paddle::optional input_grad_grad, + paddle::optional filter_grad_grad, const std::vector& strides, const std::vector& paddings, const std::string& paddding_algorithm, @@ -34,9 +34,9 @@ void ConvGradGradKernel(const Context& dev_ctx, bool use_addto, int workspace_size_MB, bool exhaustive_search, - DenseTensor* out_grad_grad, DenseTensor* input_grad, - DenseTensor* filter_grad); + DenseTensor* filter_grad, + DenseTensor* out_grad_grad); template void Conv3DGradGradKernel(const Context& dev_ctx, diff --git a/paddle/phi/kernels/cpu/conv_grad_grad_kernel.cc b/paddle/phi/kernels/cpu/conv_grad_grad_kernel.cc index f157bb017f..4966c998dd 100644 --- a/paddle/phi/kernels/cpu/conv_grad_grad_kernel.cc +++ b/paddle/phi/kernels/cpu/conv_grad_grad_kernel.cc @@ -39,11 +39,11 @@ void Conv3DGradGradKernel(const Context& ctx, DenseTensor* input_grad, DenseTensor* filter_grad) { ConvGradGradKernel(ctx, - input_grad_grad, - filter_grad_grad, - out_grad, input, filter, + out_grad, + input_grad_grad, + filter_grad_grad, strides, paddings_t, padding_algorithm, @@ -53,9 +53,9 @@ void Conv3DGradGradKernel(const Context& ctx, use_addto, workspace_size_MB, exhaustive_search_t, - out_grad_grad, input_grad, - filter_grad); + filter_grad, + out_grad_grad); } } // namespace phi diff --git a/paddle/phi/kernels/gpudnn/conv_grad_grad_kernel.cu b/paddle/phi/kernels/gpudnn/conv_grad_grad_kernel.cu index 74525e63f4..58c7ea6986 100644 --- a/paddle/phi/kernels/gpudnn/conv_grad_grad_kernel.cu +++ b/paddle/phi/kernels/gpudnn/conv_grad_grad_kernel.cu @@ -44,11 +44,11 @@ namespace phi { template void ConvCudnnGradGradKernel( const Context& ctx, - paddle::optional input_grad_grad, - paddle::optional filter_grad_grad, - const DenseTensor& out_grad, const DenseTensor& input, const DenseTensor& filter, + const DenseTensor& out_grad, + paddle::optional input_grad_grad, + paddle::optional filter_grad_grad, const std::vector& strides, const std::vector& paddings_t, const std::string& padding_algorithm, @@ -58,9 +58,9 @@ void ConvCudnnGradGradKernel( bool use_addto, int workspace_size_MB, bool exhaustive_search_t, - DenseTensor* out_grad_grad, DenseTensor* input_grad, - DenseTensor* filter_grad) { + DenseTensor* filter_grad, + DenseTensor* out_grad_grad) { auto X = &input; auto W = &filter; auto dO = &out_grad; @@ -689,11 +689,11 @@ void DepthwiseConvCudnnGradGradKernel( DenseTensor* input_grad, DenseTensor* filter_grad) { ConvCudnnGradGradKernel(ctx, - input_grad_grad, - filter_grad_grad, - out_grad, input, filter, + out_grad, + input_grad_grad, + filter_grad_grad, strides, paddings_t, padding_algorithm, @@ -703,9 +703,9 @@ void DepthwiseConvCudnnGradGradKernel( use_addto, workspace_size_MB, exhaustive_search_t, - out_grad_grad, input_grad, - filter_grad); + filter_grad, + out_grad_grad); } template @@ -729,11 +729,11 @@ void Conv3DCudnnGradGradKernel( DenseTensor* input_grad, DenseTensor* filter_grad) { ConvCudnnGradGradKernel(ctx, - input_grad_grad, - filter_grad_grad, - out_grad, input, filter, + out_grad, + input_grad_grad, + filter_grad_grad, strides, paddings_t, padding_algorithm, @@ -743,9 +743,9 @@ void Conv3DCudnnGradGradKernel( use_addto, workspace_size_MB, exhaustive_search_t, - out_grad_grad, input_grad, - filter_grad); + filter_grad, + out_grad_grad); } } // namespace phi diff --git a/paddle/phi/kernels/impl/conv_grad_grad_kernel_impl.h b/paddle/phi/kernels/impl/conv_grad_grad_kernel_impl.h index bc0ed44e17..64306bc827 100644 --- a/paddle/phi/kernels/impl/conv_grad_grad_kernel_impl.h +++ b/paddle/phi/kernels/impl/conv_grad_grad_kernel_impl.h @@ -26,11 +26,11 @@ namespace phi { template void ConvGradGradKernel(const Context& dev_ctx, - paddle::optional input_grad_grad, - paddle::optional filter_grad_grad, - const DenseTensor& out_grad, const DenseTensor& input, const DenseTensor& filter, + const DenseTensor& out_grad, + paddle::optional input_grad_grad, + paddle::optional filter_grad_grad, const std::vector& strides_t, const std::vector& paddings_t, const std::string& padding_algorithm, @@ -40,9 +40,9 @@ void ConvGradGradKernel(const Context& dev_ctx, bool use_addto, int workspace_size_MB, bool exhaustive_search, - DenseTensor* out_grad_grad, DenseTensor* input_grad, - DenseTensor* filter_grad) { + DenseTensor* filter_grad, + DenseTensor* out_grad_grad) { const DenseTensor* X = &input; const DenseTensor* dY = &out_grad; const DenseTensor* ddX = input_grad_grad.get_ptr(); diff --git a/paddle/phi/ops/compat/conv2d_sig.cc b/paddle/phi/ops/compat/conv2d_sig.cc index 19e20fddcb..7cc0d6ad17 100644 --- a/paddle/phi/ops/compat/conv2d_sig.cc +++ b/paddle/phi/ops/compat/conv2d_sig.cc @@ -62,7 +62,7 @@ KernelSignature Conv2dGradOpArgumentMapping(const ArgumentMappingContext& ctx) { KernelSignature Conv2dDoubleGradOpArgumentMapping( const ArgumentMappingContext& ctx) { return KernelSignature("conv2d_grad_grad", - {"DDInput", "DDFilter", "DOutput", "Input", "Filter"}, + {"Input", "Filter", "DOutput", "DDInput", "DDFilter"}, {"strides", "paddings", "padding_algorithm", @@ -72,7 +72,7 @@ KernelSignature Conv2dDoubleGradOpArgumentMapping( "use_addto", "workspace_size_MB", "exhaustive_search"}, - {"DDOutput", "DInput", "DFilter"}); + {"DInput", "DFilter", "DDOutput"}); } } // namespace phi diff --git a/python/paddle/fluid/tests/unittests/test_imperative_star_gan_with_gradient_penalty.py b/python/paddle/fluid/tests/unittests/test_imperative_star_gan_with_gradient_penalty.py index be81c15677..4e542fb13c 100644 --- a/python/paddle/fluid/tests/unittests/test_imperative_star_gan_with_gradient_penalty.py +++ b/python/paddle/fluid/tests/unittests/test_imperative_star_gan_with_gradient_penalty.py @@ -590,6 +590,46 @@ class TestStarGANWithGradientPenalty(unittest.TestCase): if fluid.is_compiled_with_cuda(): self.place_test(fluid.CUDAPlace(0)) + def place_test(self, place): + cfg = Config(place, False) + + dataset = create_mnist_dataset(cfg) + dataset = paddle.reader.cache(dataset) + + fluid_dygraph_loss = [] + with fluid.dygraph.guard(cfg.place): + fluid_dygraph_model = DyGraphTrainModel(cfg) + for batch_id, (image_real, label_org, + label_trg) in enumerate(dataset()): + loss = fluid_dygraph_model.run(image_real, label_org, label_trg) + fluid_dygraph_loss.append(loss) + + eager_dygraph_loss = [] + with _test_eager_guard(): + with fluid.dygraph.guard(cfg.place): + eager_dygraph_model = DyGraphTrainModel(cfg) + for batch_id, (image_real, label_org, + label_trg) in enumerate(dataset()): + loss = eager_dygraph_model.run(image_real, label_org, + label_trg) + eager_dygraph_loss.append(loss) + + for (g_loss_f, d_loss_f), (g_loss_e, d_loss_e) in zip( + fluid_dygraph_loss, eager_dygraph_loss): + self.assertEqual(g_loss_f, g_loss_e) + self.assertEqual(d_loss_f, d_loss_e) + + def test_all_cases(self): + self.func_main() + + +class TestStarGANWithGradientPenaltyLegacy(unittest.TestCase): + def func_main(self): + self.place_test(fluid.CPUPlace()) + + if fluid.is_compiled_with_cuda(): + self.place_test(fluid.CUDAPlace(0)) + def place_test(self, place): cfg = Config(place) @@ -617,8 +657,7 @@ class TestStarGANWithGradientPenalty(unittest.TestCase): self.assertEqual(d_loss_s, d_loss_d) def test_all_cases(self): - if _in_legacy_dygraph(): - self.func_main() + self.func_main() if __name__ == '__main__': diff --git a/python/paddle/nn/functional/activation.py b/python/paddle/nn/functional/activation.py index a0efdaac8f..34acbfbf75 100644 --- a/python/paddle/nn/functional/activation.py +++ b/python/paddle/nn/functional/activation.py @@ -434,7 +434,10 @@ def leaky_relu(x, negative_slope=0.01, name=None): out = F.leaky_relu(x) # [-0.02, 0., 1.] """ - if in_dynamic_mode(): + if in_dygraph_mode(): + return _C_ops.final_state_leaky_relu(x, negative_slope) + + if _in_legacy_dygraph(): return _C_ops.leaky_relu(x, 'alpha', negative_slope) check_variable_and_dtype(x, 'x', ['float16', 'float32', 'float64'], diff --git a/python/paddle/utils/code_gen/backward.yaml b/python/paddle/utils/code_gen/backward.yaml index a7b29b9f5a..64acc140c2 100644 --- a/python/paddle/utils/code_gen/backward.yaml +++ b/python/paddle/utils/code_gen/backward.yaml @@ -261,6 +261,19 @@ args : (Tensor input, Tensor filter, Tensor out_grad, int[] strides, int[] paddings, str paddding_algorithm, int groups, int[] dilations, str data_format, bool use_addto, int workspace_size_MB, bool exhaustive_search) output : Tensor(input_grad), Tensor(filter_grad) invoke : conv2d_grad_impl(input, filter, out_grad, strides, paddings, paddding_algorithm, groups, dilations, data_format, use_addto, workspace_size_MB, exhaustive_search) + backward : conv2d_grad_grad + +- backward_api : conv2d_grad_grad + forward : conv2d_grad (Tensor input, Tensor filter, Tensor grad_out, int[] strides, int[] paddings, str paddding_algorithm, int groups, int[] dilations, str data_format, bool use_addto, int workspace_size_MB, bool exhaustive_search) -> Tensor(grad_input), Tensor(grad_filter) + args : (Tensor input, Tensor filter, Tensor grad_out, Tensor grad_input_grad, Tensor grad_filter_grad, int[] strides, int[] paddings, str paddding_algorithm, int groups, int[] dilations, str data_format, bool use_addto, int workspace_size_MB, bool exhaustive_search) + output : Tensor(input_grad), Tensor(filter_grad), Tensor(grad_out_grad) + infer_meta : + func : GeneralTernaryGradInferMeta + param: [input, filter, grad_out] + kernel : + func : conv2d_grad_grad + use_cudnn : true + optional : grad_input_grad, grad_filter_grad - backward_api : conv2d_transpose_grad forward : conv2d_transpose(Tensor x, Tensor filter, int[] strides, int[] paddings, int[] output_padding, int[] output_size, str padding_algorithm, int groups, int[] dilations, str data_format) -> Tensor(out) @@ -366,7 +379,7 @@ func : UnchangedInferMeta param : [x] kernel : - func : determinant_grad + func : determinant_grad - backward_api : diagonal_grad forward : diagonal (Tensor x, int offset, int axis1, int axis2) -> Tensor(out) @@ -755,6 +768,16 @@ data_type : out_grad optional : scale, bias +- backward_api : leaky_relu_double_grad + forward : leaky_relu_grad (Tensor x, Tensor grad_out, float alpha) -> Tensor(grad_x) + args : (Tensor x, Tensor grad_x_grad, float alpha) + output : Tensor(grad_out_grad) + infer_meta : + func : UnchangedInferMeta + param : [grad_x_grad] + kernel : + func : leaky_relu_double_grad + - backward_api : leaky_relu_grad forward : leaky_relu (Tensor x, float alpha) -> Tensor(out) args : (Tensor x, Tensor out_grad, float alpha) @@ -764,6 +787,7 @@ param : [x] kernel : func : leaky_relu_grad + backward : leaky_relu_double_grad - backward_api : lerp_grad forward : lerp (Tensor x, Tensor y, Tensor weight) -> Tensor(out) -- GitLab