diff --git a/paddle/phi/kernels/gpu/add_n_kernel.cu b/paddle/phi/kernels/gpu/add_n_kernel.cu index 69bc248a7e2f2c87b9e90d14e22e6f6496bc3223..8bf3591f13c4c7a8b35bbed8cb4ee1f192dafe8c 100644 --- a/paddle/phi/kernels/gpu/add_n_kernel.cu +++ b/paddle/phi/kernels/gpu/add_n_kernel.cu @@ -75,6 +75,13 @@ void AddNKernel(const Context &dev_ctx, const std::vector &x, DenseTensor *out) { const size_t in_num = x.size(); + for (int i = 0; i < in_num; ++i) { + PADDLE_ENFORCE_EQ( + x[i]->initialized(), + true, + phi::errors::InvalidArgument( + "This argument is invalid, %d-th tensor is uninitialized.", i)); + } constexpr size_t theory_sm_threads = 1024; auto stream = dev_ctx.stream(); diff --git a/python/paddle/fluid/backward.py b/python/paddle/fluid/backward.py index 1ba11e1fba4c3fecfa9d9e20594bf039ffe50d3a..f3bc2635c4fbcda3139973fa21b2ad804be9705f 100755 --- a/python/paddle/fluid/backward.py +++ b/python/paddle/fluid/backward.py @@ -1672,6 +1672,18 @@ def _append_backward_vars_(block, start_op_idx, grad_to_var, grad_info_map): ops_to_remove.append(op_idx) continue + # sum may create invalid variable, here to deal with it. + if op_desc.type() == 'sum': + new_inputs = [] + for grad_var_name in op_desc.input_arg_names(): + if block.desc.has_var_recursive(grad_var_name.encode()): + # meet invalid sum variables, remove the invalid operand. + new_inputs.append(grad_var_name) + assert ( + len(new_inputs) > 0 + ), "After remove invalid variables, sum op have no inputs." + op_desc.set_input("X", new_inputs) + new_vars = set() # create new gradient variables for grad_var_name in op_desc.output_arg_names(): diff --git a/python/paddle/fluid/tests/unittests/test_backward.py b/python/paddle/fluid/tests/unittests/test_backward.py index 750732eb5d90794401938759b8738331fb7ebb88..c3d44d7b343c5a9c2c053de8e3747211bbabab24 100644 --- a/python/paddle/fluid/tests/unittests/test_backward.py +++ b/python/paddle/fluid/tests/unittests/test_backward.py @@ -409,6 +409,39 @@ class ConditionalNet(BackwardNet): super().__init__() +class TestBackwardUninitializedVariable(unittest.TestCase): + """this case is found in yolov5 while to_static. + gradient aggregation may cause sum a invalid variable. + """ + + def test(self): + paddle.enable_static() + main_prg, startup_prg = paddle.static.Program(), paddle.static.Program() + with paddle.static.program_guard(main_prg, startup_prg): + gt = paddle.static.data(name='gt', shape=[4], dtype='float32') + x = paddle.static.data(name='x', shape=[2], dtype='float32') + gt.stop_gradient = True + x.stop_gradient = False + gt = gt.reshape([4, 1]).reshape([4]) + loss = ( + paddle.nn.functional.binary_cross_entropy(x, gt[:2]) + + (gt[2:4] * x).sum() + ) + exe = paddle.static.Executor() + paddle.fluid.backward.gradients(loss, []) + exe.run(startup_prg) + # Optimizer + out = exe.run( + main_prg, + feed={ + 'gt': np.array([1.0, 1.0, 0.0, 0.0], dtype='float32'), + 'x': np.array([0.5, 0.5], dtype='float32'), + }, + fetch_list=[loss], + ) + print(out) + + if __name__ == '__main__': paddle.enable_static() unittest.main()