From 95614811614d4b3ed841f7f06553efceefc20cca Mon Sep 17 00:00:00 2001 From: xiaoguoguo626807 <100397923+xiaoguoguo626807@users.noreply.github.com> Date: Fri, 24 Mar 2023 17:24:48 +0800 Subject: [PATCH] =?UTF-8?q?Revert=20"=E3=80=90prim=E3=80=91delete=20high?= =?UTF-8?q?=20order=20prim=20flag=20&&=20add=20special=20prune=20rules=20f?= =?UTF-8?q?or=20node.cc=20(#51676)"?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This reverts commit 978d544ba8674afd67cecd4f2d237a15071cfdd3. --- .../generator/eager_gen.py | 58 +-------- paddle/fluid/eager/grad_node_info.cc | 63 --------- paddle/fluid/eager/grad_node_info.h | 3 - paddle/phi/api/yaml/legacy_backward.yaml | 14 +- .../unittests/test_imperative_double_grad.py | 119 ++++++----------- .../unittests/test_imperative_triple_grad.py | 123 ++++++++++++++---- 6 files changed, 163 insertions(+), 217 deletions(-) diff --git a/paddle/fluid/eager/auto_code_generator/generator/eager_gen.py b/paddle/fluid/eager/auto_code_generator/generator/eager_gen.py index f9ff978ad4f..d800dc92988 100644 --- a/paddle/fluid/eager/auto_code_generator/generator/eager_gen.py +++ b/paddle/fluid/eager/auto_code_generator/generator/eager_gen.py @@ -60,19 +60,6 @@ black_ops_list = [ ] -# white ops list whose kernel can be deleted after performance analysis -# original kernel and its derivative kernel can be deleted when composite_grad -# kernel performs same to it. -prim_white_list = ["matmul_double_grad"] - -# dict of special api that forward api's output will affect bacward api's output -# bacward api's output usually affected by backward api's input -special_prune_dict = { - "matmul_grad": {"x": "grad_y", "y": "grad_x"}, - "multiply_grad": {"x": "grad_y", "y": "grad_x"}, -} - - ######### # Utils # ######### @@ -990,25 +977,12 @@ class DygraphFunctionGeneratorBase(FunctionGeneratorBase): grad_node_out_list.append(name) is_optional = name in self.optional_inputs - is_special_forward_api = ( - True if forward_api_name in special_prune_dict else False - ) - if is_optional: set_grad_out_meta = f"{indent}if({name}.get_ptr() != nullptr) grad_node->SetGradOutMeta(*({name}.get_ptr()), {pos});" else: - if ( - is_special_forward_api - and name in special_prune_dict[forward_api_name] - ): - meta_name = GetAutoGradMetaName( - special_prune_dict[forward_api_name][name] - ) - set_grad_out_meta = f"{indent}grad_node->SetGradOutMeta({name}, {meta_name}, {pos});" - else: - set_grad_out_meta = ( - f"{indent}grad_node->SetGradOutMeta({name}, {pos});" - ) + set_grad_out_meta = ( + f"{indent}grad_node->SetGradOutMeta({name}, {pos});" + ) set_grad_out_meta_list.append(set_grad_out_meta) set_grad_out_meta_str = "\n".join(set_grad_out_meta_list) @@ -2281,33 +2255,13 @@ class DygraphNodeGenerator(DygraphFunctionGeneratorBase): """ # TODO(Ruting):using composite only when we don't have backward kernel in the future. elif is_composite_grad_api: - if composite_grad_api_name in prim_white_list: - grad_function_call_str = f""" -{indent}bool original_global_grad = egr::Controller::Instance().HasGrad(); -{indent}if(!create_graph){{ -{indent}{indent}egr::Controller::Instance().SetHasGrad(create_graph); - }} - {indent}{composite_grad_api_namespace}{composite_grad_api_name}{composite_template_name}({composite_grad_api_args_str}); - VLOG(4) << "Composite api {composite_grad_api_name} is called "; -{indent}if(!create_graph){{ -{indent}{indent}egr::Controller::Instance().SetHasGrad(original_global_grad); - }} - """ - else: - grad_function_call_str = f""" + grad_function_call_str = f""" if (paddle::prim::PrimCommonUtils::IsEagerPrimEnabled()) {{ -{indent}bool original_global_grad = egr::Controller::Instance().HasGrad(); -{indent}if(!create_graph){{ -{indent}{indent}egr::Controller::Instance().SetHasGrad(create_graph); - }} {indent}{composite_grad_api_namespace}{composite_grad_api_name}{composite_template_name}({composite_grad_api_args_str}); - {indent}VLOG(4) << "Composite api {composite_grad_api_name} is called "; -{indent}if(!create_graph){{ -{indent}{indent}egr::Controller::Instance().SetHasGrad(original_global_grad); - }} + VLOG(4) << "Composite api {composite_grad_api_name} is called "; }}else{{ {indent}{grad_api_namespace}{backward_api_name}({grad_api_args_str}); - {indent}VLOG(4) << "Fused api {backward_api_name} is called "; + VLOG(4) << "Fused api {backward_api_name} is called "; }} """ else: diff --git a/paddle/fluid/eager/grad_node_info.cc b/paddle/fluid/eager/grad_node_info.cc index 9d1c7619750..eed3d8ed5ca 100644 --- a/paddle/fluid/eager/grad_node_info.cc +++ b/paddle/fluid/eager/grad_node_info.cc @@ -263,69 +263,6 @@ void GradNodeBase::SetGradOutMeta(const paddle::Tensor& fwd_in, } } -/* -special func for matmul_double_grad etc. dx exists when x and y exists, -if stop_gradient of y is true, dy is None who is matmul_grad's out_put, ddy is -None, so dx = ddy * dout should be None. -*/ -void GradNodeBase::SetGradOutMeta(const paddle::Tensor& fwd_in, - const AutogradMeta* fwd_out_meta, - size_t slot_rank) { - auto* fwd_in_meta = egr::EagerUtils::nullable_autograd_meta(fwd_in); - PADDLE_ENFORCE_LE( - (slot_rank + 1), - bwd_out_meta_.size(), - paddle::platform::errors::InvalidArgument( - "Slot Rank should less equal than bwd_out_meta_ size, " - "since bwd_out_meta_ is designed to hold as same num as " - "backward outputs.")); - auto& metas = bwd_out_meta_.at(slot_rank); - // Init stop gradient vector before use to avoid push back - if (metas.size() == 0) { - metas.resize(1); - } - auto& meta = metas[0]; - // Set Stop_gradient - if (fwd_in_meta && !fwd_in_meta->StopGradient() && fwd_out_meta) { - meta.SetStopGradient(false); - } else { - meta.SetStopGradient(true); - } - // Set Adj Edges - if (fwd_in_meta && !fwd_in_meta->StopGradient() && fwd_out_meta) { - auto node = fwd_in_meta->GetMutableGradNode(); - if (!node || !node.get()) { - fwd_in_meta->SetGradNode( - std::make_shared(fwd_in_meta)); - } - VLOG(3) << "Add Edges for slot: " << slot_rank << ", the Edge is from " - << this->name() << " (addr: " << this << ") " - << " to " << fwd_in_meta->GetMutableGradNode()->name() - << " (addr: " << fwd_in_meta->GetMutableGradNode().get() << ")"; - - meta.SetEdge(fwd_in_meta->GetMutableGradNode(), fwd_in_meta->OutRankInfo()); - } - // Record TensorMeta - if (fwd_in.impl() && fwd_in.impl().get()) { - if (phi::DenseTensor::classof(fwd_in.impl().get())) { - // Only Copy Meta - phi::DenseTensor* dense_tensor = - static_cast(fwd_in.impl().get()); - PADDLE_ENFORCE_NE( - dense_tensor->meta().dtype, - phi::DataType::UNDEFINED, - paddle::platform::errors::Fatal("Attempting to copy DenseTensorMeta " - "with phi::DataType::UNDEFINED," - "which is illegal.")); - meta.SetTensorMeta(dense_tensor->meta()); - meta.SetPlace(fwd_in.place()); - } - } else { - VLOG(7) << "Unable to initialize the DenseTensorMeta of GradSlotMeta with " - "non-DenseTensor argument."; - } -} - void GradNodeBase::SetGradOutMeta(const std::vector& fwd_in, size_t slot_rank) { size_t slot_size = fwd_in.size(); diff --git a/paddle/fluid/eager/grad_node_info.h b/paddle/fluid/eager/grad_node_info.h index 8d73092ddf9..9c7bfd4b8db 100644 --- a/paddle/fluid/eager/grad_node_info.h +++ b/paddle/fluid/eager/grad_node_info.h @@ -225,9 +225,6 @@ class GradNodeBase { void SetGradOutMeta(const std::vector& fwd_in, size_t slot_rank); void SetGradOutMeta(const paddle::Tensor& fwd_in, size_t slot_rank); - void SetGradOutMeta(const paddle::Tensor& fwd_in, - const AutogradMeta* fwd_in_other, - size_t slot_rank); /** * Default setters for Grad in/out meta this should be used for same special * Node which will not create by user diff --git a/paddle/phi/api/yaml/legacy_backward.yaml b/paddle/phi/api/yaml/legacy_backward.yaml index 6f6da654533..607ee5da9ae 100755 --- a/paddle/phi/api/yaml/legacy_backward.yaml +++ b/paddle/phi/api/yaml/legacy_backward.yaml @@ -681,7 +681,8 @@ param : [x, y, grad_out] kernel : func : matmul_double_grad - composite : matmul_double_grad(x, y, grad_out, grad_x_grad, grad_y_grad, transpose_x=false, transpose_y=false) + composite : matmul_double_grad(x, y, grad_out, grad_x_grad, grad_y_grad, transpose_x, transpose_y, x_grad, y_grad, grad_out_grad) + backward : matmul_triple_grad optional : grad_x_grad, grad_y_grad - backward_op : matmul_grad @@ -695,6 +696,17 @@ func : matmul_grad backward : matmul_double_grad +- backward_op : matmul_triple_grad + forward : matmul_double_grad (Tensor x, Tensor y, Tensor fwd_grad_out, Tensor fwd_grad_grad_x, Tensor fwd_grad_grad_y, bool transpose_x=false, bool transpose_y=false) -> Tensor(grad_x), Tensor(grad_y), Tensor(grad_grad_out) + args : (Tensor x, Tensor y, Tensor fwd_grad_out, Tensor fwd_grad_grad_x, Tensor fwd_grad_grad_y, Tensor grad_x_grad, Tensor grad_y_grad, Tensor grad_grad_out_grad, bool transpose_x=false, bool transpose_y=false) + output : Tensor(x_grad), Tensor(y_grad), Tensor(fwd_grad_out_grad), Tensor(fwd_grad_grad_x_grad), Tensor(fwd_grad_grad_y_grad) + infer_meta : + func : GeneralQuinaryGradInferMeta + param : [x, y, fwd_grad_out, fwd_grad_grad_x, fwd_grad_grad_y] + kernel : + func : matmul_triple_grad + optional : fwd_grad_grad_x, fwd_grad_grad_y, grad_x_grad, grad_y_grad, grad_grad_out_grad + - backward_op : max_grad forward: max (Tensor x, IntArray axis={}, bool keepdim=false) -> Tensor(out) args : (Tensor x, Tensor out, Tensor out_grad, IntArray axis={}, bool keepdim=false, bool reduce_all=false) diff --git a/python/paddle/fluid/tests/unittests/test_imperative_double_grad.py b/python/paddle/fluid/tests/unittests/test_imperative_double_grad.py index 128add50bab..0db2bc01150 100644 --- a/python/paddle/fluid/tests/unittests/test_imperative_double_grad.py +++ b/python/paddle/fluid/tests/unittests/test_imperative_double_grad.py @@ -706,38 +706,30 @@ class TestDygraphDoubleGradMatmul(TestCase): dout = paddle.to_tensor( np.ones([3, 3]), stop_gradient=False, dtype='float32' ) - (dx, dy) = paddle.grad( - [out], [x, y], [dout], retain_graph=True, create_graph=True + (dx,) = paddle.grad( + [out], [x], [dout], retain_graph=True, create_graph=True ) ddx = paddle.to_tensor( np.ones([3, 3]), stop_gradient=False, dtype='float32' ) - ddy = ddx dx_double_grad, dy_double_grad, ddout = paddle.grad( - [dx, dy], + [dx], [x, y, dout], - [ddx, ddy], + [ddx], retain_graph=True, create_graph=True, ) return dx_double_grad, dy_double_grad, ddout def expected(): - dx_double_grad_expected = np.matmul( - np.ones([3, 3], dtype="float32"), - np.ones([3, 3], dtype="float32"), - ) + dx_double_grad_expected = np.zeros([3, 3], dtype="float32") dy_double_grad_expected = np.matmul( np.ones([3, 3], dtype="float32"), np.ones([3, 3], dtype="float32"), ) - ddout_expected1 = np.matmul( + ddout_expected = np.matmul( np.ones([3, 3], dtype="float32"), input_numpy_y ) - ddout_expected2 = np.matmul( - input_numpy_x, np.ones([3, 3], dtype="float32") - ) - ddout_expected = ddout_expected1 + ddout_expected2 return ( dx_double_grad_expected, dy_double_grad_expected, @@ -781,26 +773,27 @@ class TestDygraphDoubleGradMatmul(TestCase): ddy = paddle.to_tensor( np.ones([3, 3]), stop_gradient=False, dtype='float32' ) - # when x isnot be differentiate in first grad dy in second grad could be None in composite op - dx_double_grad, ddout = paddle.grad( + dx_double_grad, dy_double_grad, ddout = paddle.grad( [dy], - [x, dout], + [x, y, dout], [ddy], retain_graph=True, create_graph=True, ) - return dx_double_grad, ddout + return dx_double_grad, dy_double_grad, ddout def expected(): dx_double_grad_expected = np.matmul( np.ones([3, 3], dtype="float32"), np.ones([3, 3], dtype="float32"), ) + dy_double_grad_expected = np.zeros([3, 3], dtype="float32") ddout_expected = np.matmul( input_numpy_x, np.ones([3, 3], dtype="float32") ) return ( dx_double_grad_expected, + dy_double_grad_expected, ddout_expected, ) @@ -841,23 +834,24 @@ class TestDygraphDoubleGradMatmul(TestCase): ddy = paddle.to_tensor( np.ones([3]), stop_gradient=False, dtype='float32' ) - # when x is not be differentiate in first grad, dy from second grad could be None in composite api. - dx_double_grad, ddout = paddle.grad( + dx_double_grad, dy_double_grad, ddout = paddle.grad( [dy], - [x, dout], + [x, y, dout], [ddy], retain_graph=True, create_graph=True, ) - return dx_double_grad, ddout + return dx_double_grad, dy_double_grad, ddout def expected(): dx_double_grad_expected = np.ones([3], dtype="float32") + dy_double_grad_expected = np.zeros([3], dtype="float32") ddout_expected = np.matmul( input_numpy_x, np.ones([3], dtype="float32") ) return ( dx_double_grad_expected, + dy_double_grad_expected, ddout_expected, ) @@ -898,22 +892,23 @@ class TestDygraphDoubleGradMatmul(TestCase): ddx = paddle.to_tensor( np.ones([3]), stop_gradient=False, dtype='float32' ) - # when y is not be differentiate in first grad, dx from second grad could be None in composite api. - dy_double_grad, ddout = paddle.grad( + dx_double_grad, dy_double_grad, ddout = paddle.grad( [dx], - [y, dout], + [x, y, dout], [ddx], retain_graph=True, create_graph=True, ) - return dy_double_grad, ddout + return dx_double_grad, dy_double_grad, ddout def expected(): + dx_double_grad_expected = np.zeros([3], dtype="float32") dy_double_grad_expected = np.ones([3], dtype="float32") ddout_expected = np.matmul( input_numpy_y, np.ones([3], dtype="float32") ) return ( + dx_double_grad_expected, dy_double_grad_expected, ddout_expected, ) @@ -925,7 +920,6 @@ class TestDygraphDoubleGradMatmul(TestCase): for place in places: paddle.device.set_device(place) actual_results = actual() - for expected_result, actual_result in zip( expected_results, actual_results ): @@ -956,22 +950,24 @@ class TestDygraphDoubleGradMatmul(TestCase): ddy = paddle.to_tensor( np.ones([1]), stop_gradient=False, dtype='float32' ) - dx_double_grad, ddout = paddle.grad( + dx_double_grad, dy_double_grad, ddout = paddle.grad( [dy], - [x, dout], + [x, y, dout], [ddy], retain_graph=True, create_graph=True, ) - return dx_double_grad, ddout + return dx_double_grad, dy_double_grad, ddout def expected(): dx_double_grad_expected = np.ones([2, 1], dtype="float32") + dy_double_grad_expected = np.zeros([1], dtype="float32") ddout_expected = np.matmul( input_numpy_x, np.ones([1], dtype="float32") ) return ( dx_double_grad_expected, + dy_double_grad_expected, ddout_expected, ) @@ -1012,19 +1008,21 @@ class TestDygraphDoubleGradMatmul(TestCase): ddx = paddle.to_tensor( np.ones([2, 1]), stop_gradient=False, dtype='float32' ) - dy_double_grad, ddout = paddle.grad( + dx_double_grad, dy_double_grad, ddout = paddle.grad( [dx], - [y, dout], + [x, y, dout], [ddx], retain_graph=True, create_graph=True, ) - return dy_double_grad, ddout + return dx_double_grad, dy_double_grad, ddout def expected(): + dx_double_grad_expected = np.zeros([2, 1], dtype="float32") dy_double_grad_expected = np.ones([1], dtype="float32") * 2 ddout_expected = np.ones([2], dtype="float32") * input_numpy_y[0] return ( + dx_double_grad_expected, dy_double_grad_expected, ddout_expected, ) @@ -1043,8 +1041,6 @@ class TestDygraphDoubleGradMatmul(TestCase): expected_result, actual_result, rtol=1e-6 ) - # TODO(Ruting) test complex dtype when composite api support - ''' # case7: ddx is none, dims = 1, complex dtype def test_matmul_double_grad_case7(self): input_numpy_x = np.random.random([3]).astype( @@ -1073,17 +1069,19 @@ class TestDygraphDoubleGradMatmul(TestCase): ddx = paddle.to_tensor( np.ones([3]), stop_gradient=False, dtype='complex64' ) - # when y is not be differentiate in first grad, dx from second grad could be None in composite api. - dy_double_grad, ddout = paddle.grad( + dx_double_grad, dy_double_grad, ddout = paddle.grad( [dx], - [y, dout], + [x, y, dout], [ddx], retain_graph=True, create_graph=True, ) - return dy_double_grad, ddout + return dx_double_grad, dy_double_grad, ddout def expected(): + dx_double_grad_expected = np.zeros( + [3], dtype="float32" + ) + 0j * np.zeros([3], dtype="float32") dy_double_grad_expected = np.ones( [3], dtype="float32" ) + 0j * np.ones([3], dtype="float32") @@ -1091,6 +1089,7 @@ class TestDygraphDoubleGradMatmul(TestCase): input_numpy_y_conj, np.ones([3], dtype="float32") ) return ( + dx_double_grad_expected, dy_double_grad_expected, ddout_expected, ) @@ -1109,7 +1108,6 @@ class TestDygraphDoubleGradMatmul(TestCase): expected_result, actual_result, rtol=1e-6 ) - # case8: ddy is none, dims = 1, complex dtype def test_matmul_double_grad_case8(self): input_numpy_x = np.random.random([3]).astype( @@ -1138,22 +1136,24 @@ class TestDygraphDoubleGradMatmul(TestCase): ddy = paddle.to_tensor( np.ones([3]), stop_gradient=False, dtype='complex64' ) - dx_double_grad, ddout = paddle.grad( + dx_double_grad, dy_double_grad, ddout = paddle.grad( [dy], - [x, dout], + [x, y, dout], [ddy], retain_graph=True, create_graph=True, ) - return dx_double_grad, ddout + return dx_double_grad, dy_double_grad, ddout def expected(): dx_double_grad_expected = np.ones([3], dtype="float32") + dy_double_grad_expected = np.zeros([3], dtype="float32") ddout_expected = np.matmul( input_numpy_x_conj, np.ones([3], dtype="float32") ) return ( dx_double_grad_expected, + dy_double_grad_expected, ddout_expected, ) @@ -1170,39 +1170,6 @@ class TestDygraphDoubleGradMatmul(TestCase): np.testing.assert_allclose( expected_result, actual_result, rtol=1e-6 ) - ''' - - def test_value_error(self): - def test(): - import paddle - import paddle.nn as nn - - model = nn.Sequential(nn.Linear(3, 4)) - - x = paddle.randn([4, 1]) - y = paddle.randn([4, 1]) - z = paddle.randn([4, 1]) - x.stop_gradient = False - y.stop_gradient = False - z.stop_gradient = False - out = model(paddle.concat((x, y, z), axis=1)) - - data = { - "x": x, - "y": y, - "z": z, - "u": out[:, 0:1], - "v": out[:, 1:2], - "w": out[:, 2:3], - "p": out[:, 3:4], - } - - v = out[:, 1:2] - z = paddle.grad(v, x, create_graph=True)[0] - zz = paddle.grad(z, x, create_graph=True)[0] - - with self.assertRaises(ValueError): - test() if __name__ == '__main__': diff --git a/python/paddle/fluid/tests/unittests/test_imperative_triple_grad.py b/python/paddle/fluid/tests/unittests/test_imperative_triple_grad.py index 43b744660fe..63322b3f6d8 100644 --- a/python/paddle/fluid/tests/unittests/test_imperative_triple_grad.py +++ b/python/paddle/fluid/tests/unittests/test_imperative_triple_grad.py @@ -89,17 +89,17 @@ class TestDygraphTripleGradMatmul(TestCase): np.testing.assert_array_equal(new_c.numpy(), new_c_ref) x_grad_ref = np.ones([3, 3]) * 0.0 - assert x.grad is None + np.testing.assert_array_equal(x.grad.numpy(), x_grad_ref) y_grad_ref = np.ones([3, 3]) * 0.0 - assert y.grad is None + np.testing.assert_array_equal(y.grad.numpy(), y_grad_ref) new_out_g_ref = np.ones([3, 3]) * 3.0 np.testing.assert_array_equal(new_out_g.grad.numpy(), new_out_g_ref) new_x_g_g_ref = np.ones([3, 3]) * 0.0 new_y_g_g_ref = np.ones([3, 3]) * 3.0 - assert new_x_g_g.grad is None + np.testing.assert_array_equal(new_x_g_g.grad.numpy(), new_x_g_g_ref) np.testing.assert_array_equal(new_y_g_g.grad.numpy(), new_y_g_g_ref) @@ -359,14 +359,13 @@ class TestDygraphTripleGradMatmulcase1(TestCase): retain_graph=True, create_graph=True, ) - # d_x, d_y should be none because ddd_out = None - d_dout, d_ddx, d_ddy = paddle.grad( + d_x, d_y, d_dout, d_ddx, d_ddy = paddle.grad( [dx_double_grad, dy_double_grad], - [dout, ddx, ddy], + [x, y, dout, ddx, ddy], retain_graph=False, create_graph=False, ) - return d_dout, d_ddx, d_ddy + return d_x, d_y, d_dout, d_ddx, d_ddy # case1: d_ddout is none, dims != 1 def test_matmul_triple_grad_case1(self): @@ -378,10 +377,14 @@ class TestDygraphTripleGradMatmulcase1(TestCase): self.input_numpy_ddy = np.ones([3, 3], dtype="float32") init_data() + d_x_expected = np.zeros([3, 3], dtype="float32") + d_y_expected = np.zeros([3, 3], dtype="float32") d_dout_expected = np.ones([3, 3], dtype="float32") * 6 d_ddx_expected = np.ones([3, 3], dtype="float32") * 3 d_ddy_expected = np.ones([3, 3], dtype="float32") * 3 expected_results = ( + d_x_expected, + d_y_expected, d_dout_expected, d_ddx_expected, d_ddy_expected, @@ -415,6 +418,18 @@ class TestDygraphTripleGradMatmulcase1(TestCase): self.input_numpy_ddy = np.ones([3], dtype="float32") init_data() + d_x_expected = np.zeros( + [ + 3, + ], + dtype="float32", + ) + d_y_expected = np.zeros( + [ + 3, + ], + dtype="float32", + ) d_dout_expected = np.ones([1], dtype="float32") * 6 d_ddx_expected = np.ones( [ @@ -429,6 +444,8 @@ class TestDygraphTripleGradMatmulcase1(TestCase): dtype="float32", ) expected_results = ( + d_x_expected, + d_y_expected, d_dout_expected, d_ddx_expected, d_ddy_expected, @@ -458,6 +475,8 @@ class TestDygraphTripleGradMatmulcase1(TestCase): self.input_numpy_ddy = np.ones([1], dtype="float32") init_data() + d_x_expected = np.zeros([3, 1], dtype="float32") + d_y_expected = np.zeros([1], dtype="float32") d_dout_expected = ( np.ones( [ @@ -470,6 +489,8 @@ class TestDygraphTripleGradMatmulcase1(TestCase): d_ddx_expected = np.ones([3, 1], dtype="float32") d_ddy_expected = np.ones([1], dtype="float32") * 3 expected_results = ( + d_x_expected, + d_y_expected, d_dout_expected, d_ddx_expected, d_ddy_expected, @@ -486,7 +507,6 @@ class TestDygraphTripleGradMatmulcase1(TestCase): ) -''' # d_ddout is none, dtype is complex64 class TestDygraphTripleGradMatmulcase2(TestCase): def setUp(self): @@ -646,7 +666,6 @@ class TestDygraphTripleGradMatmulcase2(TestCase): np.testing.assert_allclose( expected_result, actual_result, rtol=1e-6 ) -''' # d_ddout is none, d_dx is none, dtype is float32 @@ -689,15 +708,13 @@ class TestDygraphTripleGradMatmulcase3(TestCase): retain_graph=True, create_graph=True, ) - # d_x d_y is None because (double grad out_put ddout grad tensor)d_ddout is None - # d_ddy is None because (double grad out_put dx grad tensor) d_dx and d_ddout is None - d_dout, d_ddx = paddle.grad( + d_x, d_y, d_dout, d_ddx, d_ddy = paddle.grad( [dy_double_grad], - [dout, ddx], + [x, y, dout, ddx, ddy], retain_graph=False, create_graph=False, ) - return d_dout, d_ddx + return d_x, d_y, d_dout, d_ddx, d_ddy # case1: d_ddout is none, d_dx is none, dims != 1 def test_matmul_triple_grad_case1(self): @@ -709,11 +726,17 @@ class TestDygraphTripleGradMatmulcase3(TestCase): self.input_numpy_ddy = np.ones([3, 3], dtype="float32") init_data() + d_x_expected = np.zeros([3, 3], dtype="float32") + d_y_expected = np.zeros([3, 3], dtype="float32") d_dout_expected = np.ones([3, 3], dtype="float32") * 3 d_ddx_expected = np.ones([3, 3], dtype="float32") * 3 + d_ddy_expected = np.zeros([3, 3], dtype="float32") expected_results = ( + d_x_expected, + d_y_expected, d_dout_expected, d_ddx_expected, + d_ddy_expected, ) for place in self.places: @@ -744,6 +767,18 @@ class TestDygraphTripleGradMatmulcase3(TestCase): self.input_numpy_ddy = np.ones([3], dtype="float32") init_data() + d_x_expected = np.zeros( + [ + 3, + ], + dtype="float32", + ) + d_y_expected = np.zeros( + [ + 3, + ], + dtype="float32", + ) d_dout_expected = np.ones([1], dtype="float32") * 3 d_ddx_expected = np.ones( [ @@ -751,9 +786,18 @@ class TestDygraphTripleGradMatmulcase3(TestCase): ], dtype="float32", ) + d_ddy_expected = np.zeros( + [ + 3, + ], + dtype="float32", + ) expected_results = ( + d_x_expected, + d_y_expected, d_dout_expected, d_ddx_expected, + d_ddy_expected, ) for place in self.places: @@ -780,6 +824,8 @@ class TestDygraphTripleGradMatmulcase3(TestCase): self.input_numpy_ddy = np.ones([1], dtype="float32") init_data() + d_x_expected = np.zeros([3, 1], dtype="float32") + d_y_expected = np.zeros([1], dtype="float32") d_dout_expected = np.ones( [ 3, @@ -787,9 +833,13 @@ class TestDygraphTripleGradMatmulcase3(TestCase): dtype="float32", ) d_ddx_expected = np.ones([3, 1], dtype="float32") + d_ddy_expected = np.zeros([1], dtype="float32") expected_results = ( + d_x_expected, + d_y_expected, d_dout_expected, d_ddx_expected, + d_ddy_expected, ) for place in self.places: @@ -803,7 +853,6 @@ class TestDygraphTripleGradMatmulcase3(TestCase): ) -''' # d_ddout is none, d_dx is none, dtype is complex64 class TestDygraphTripleGradMatmulcase4(TestCase): def setUp(self): @@ -950,7 +999,6 @@ class TestDygraphTripleGradMatmulcase4(TestCase): np.testing.assert_allclose( expected_result, actual_result, rtol=1e-6 ) -''' # d_ddout is none, d_dy is none, dtype is float32 @@ -993,13 +1041,13 @@ class TestDygraphTripleGradMatmulcase5(TestCase): retain_graph=True, create_graph=True, ) - d_dout, d_ddy = paddle.grad( + d_x, d_y, d_dout, d_ddx, d_ddy = paddle.grad( [dx_double_grad], - [dout, ddy], + [x, y, dout, ddx, ddy], retain_graph=False, create_graph=False, ) - return d_dout, d_ddy + return d_x, d_y, d_dout, d_ddx, d_ddy # case1: d_ddout is none, d_dy is none, dims != 1 def test_matmul_triple_grad_case1(self): @@ -1011,10 +1059,16 @@ class TestDygraphTripleGradMatmulcase5(TestCase): self.input_numpy_ddy = np.ones([3, 3], dtype="float32") init_data() + d_x_expected = np.zeros([3, 3], dtype="float32") + d_y_expected = np.zeros([3, 3], dtype="float32") d_dout_expected = np.ones([3, 3], dtype="float32") * 3 + d_ddx_expected = np.zeros([3, 3], dtype="float32") * 3 d_ddy_expected = np.ones([3, 3], dtype="float32") * 3 expected_results = ( + d_x_expected, + d_y_expected, d_dout_expected, + d_ddx_expected, d_ddy_expected, ) @@ -1046,7 +1100,25 @@ class TestDygraphTripleGradMatmulcase5(TestCase): self.input_numpy_ddy = np.ones([3], dtype="float32") init_data() + d_x_expected = np.zeros( + [ + 3, + ], + dtype="float32", + ) + d_y_expected = np.zeros( + [ + 3, + ], + dtype="float32", + ) d_dout_expected = np.ones([1], dtype="float32") * 3 + d_ddx_expected = np.zeros( + [ + 3, + ], + dtype="float32", + ) d_ddy_expected = np.ones( [ 3, @@ -1054,7 +1126,10 @@ class TestDygraphTripleGradMatmulcase5(TestCase): dtype="float32", ) expected_results = ( + d_x_expected, + d_y_expected, d_dout_expected, + d_ddx_expected, d_ddy_expected, ) @@ -1082,15 +1157,21 @@ class TestDygraphTripleGradMatmulcase5(TestCase): self.input_numpy_ddy = np.ones([1], dtype="float32") init_data() + d_x_expected = np.zeros([3, 1], dtype="float32") + d_y_expected = np.zeros([1], dtype="float32") d_dout_expected = np.ones( [ 3, ], dtype="float32", ) + d_ddx_expected = np.zeros([3, 1], dtype="float32") d_ddy_expected = np.ones([1], dtype="float32") * 3 expected_results = ( + d_x_expected, + d_y_expected, d_dout_expected, + d_ddx_expected, d_ddy_expected, ) @@ -1105,8 +1186,6 @@ class TestDygraphTripleGradMatmulcase5(TestCase): ) -''' -TODO(Ruting) test complex dtype when composite api support # d_ddout is none, d_dy is none, dtype is complex64 class TestDygraphTripleGradMatmulcase6(TestCase): def setUp(self): @@ -1253,7 +1332,7 @@ class TestDygraphTripleGradMatmulcase6(TestCase): np.testing.assert_allclose( expected_result, actual_result, rtol=1e-6 ) -''' + if __name__ == '__main__': unittest.main() -- GitLab