diff --git a/paddle/fluid/imperative/basic_engine.cc b/paddle/fluid/imperative/basic_engine.cc index 896918a607106380158ca9b9d40cdf5e5ad990f5..7bcc3d6c608c947f71ae030cfb17d4a89495939e 100644 --- a/paddle/fluid/imperative/basic_engine.cc +++ b/paddle/fluid/imperative/basic_engine.cc @@ -471,12 +471,20 @@ void BasicEngine::Execute() { { VLOG(3) << "Start to execute grad op " << cur_op.Type(); - if (tmp_ins_ptr == nullptr) { - OpBase::Run(cur_op.InnerOp(), bwd_ins, tmp_outs, cur_op.Attrs(), - cur_op.place()); - } else { - OpBase::Run(cur_op.InnerOp(), *tmp_ins_ptr, tmp_outs, cur_op.Attrs(), - cur_op.place()); + try { + if (tmp_ins_ptr == nullptr) { + OpBase::Run(cur_op.InnerOp(), bwd_ins, tmp_outs, cur_op.Attrs(), + cur_op.place()); + } else { + OpBase::Run(cur_op.InnerOp(), *tmp_ins_ptr, tmp_outs, + cur_op.Attrs(), cur_op.place()); + } + } catch (platform::EnforceNotMet& exception) { + Clear(); + throw std::move(exception); + } catch (std::exception& ex) { + Clear(); + PADDLE_THROW(platform::errors::External("%s", ex.what())); } } diff --git a/paddle/fluid/imperative/py_layer_fwd.h b/paddle/fluid/imperative/py_layer_fwd.h index bd132f2576fec14511523958d4ce64077b99b1f1..ccfd5b0e2dbfcd28692e899f17566cb4b6cf9344 100644 --- a/paddle/fluid/imperative/py_layer_fwd.h +++ b/paddle/fluid/imperative/py_layer_fwd.h @@ -115,12 +115,12 @@ py::object PyLayerApply(const platform::Place& place, const py::object& cls, tuple_result[i].cast>(); output_vars.push_back(temp_out); } catch (py::cast_error&) { - PADDLE_THROW(platform::errors::Unimplemented( - "The output of `PyLayer.forward` should be `Tensor`.")); + // Only collect Tensor type in 'kwargs' and pass them to backward. + // Ignore other types of input temporarily. } } else { - PADDLE_THROW(platform::errors::Unimplemented( - "The output of `PyLayer.forward` can not be `None`.")); + // Only collect Tensor type in 'kwargs' and pass them to backward. + // Ignore other types of input temporarily. } } } else { @@ -130,14 +130,18 @@ py::object PyLayerApply(const platform::Place& place, const py::object& cls, result_forward.cast>(); output_vars.push_back(temp_out); } catch (py::cast_error&) { - PADDLE_THROW(platform::errors::Unimplemented( - "The output of `PyLayer.forward` should be `Tensor`.")); + // Only collect Tensor type in 'kwargs' and pass them to backward. + // Ignore other types of input temporarily. } } else { - PADDLE_THROW(platform::errors::Unimplemented( - "The output of `PyLayer.forward` can not be `None`.")); + // Only collect Tensor type in 'kwargs' and pass them to backward. + // Ignore other types of input temporarily. } } + if (output_vars.size() == 0) { + PADDLE_THROW(platform::errors::InvalidArgument( + "At least one output of `PyLayer.forward` is a `Tensor`.")); + } NameVarBaseMap outs = {{"Out", output_vars}}; diff --git a/paddle/fluid/operators/py_layer_op.cc b/paddle/fluid/operators/py_layer_op.cc index 65e10181dcc3df06395ae5cae65efb251021857e..0090747d1161a4c02fc67407d92b99aff5faec30 100644 --- a/paddle/fluid/operators/py_layer_op.cc +++ b/paddle/fluid/operators/py_layer_op.cc @@ -86,6 +86,12 @@ void RunPyObject(py::object *py_object, } } } else { + if (1 != outs->size()) { + PADDLE_THROW(platform::errors::InvalidArgument( + "The number of outputs of `PyLayer.backward` should be %d, but " + "received 1.", + outs->size())); + } if ((*outs)[0] != nullptr) { if (Py_None != py_result.ptr()) { try { diff --git a/python/paddle/fluid/tests/unittests/test_pylayer_op.py b/python/paddle/fluid/tests/unittests/test_pylayer_op.py index d329bf570a5845c7c261e53e9bd0c064a908ae09..e058115d691993781d7f6d0fb9aa20b633ab60d9 100644 --- a/python/paddle/fluid/tests/unittests/test_pylayer_op.py +++ b/python/paddle/fluid/tests/unittests/test_pylayer_op.py @@ -30,7 +30,7 @@ class TestPyLayer(unittest.TestCase): y1 = func1(x1) y2 = func1(x2) ctx.save_for_backward(y1, y2) - return y1, y2 + return y1, 1, y2, None @staticmethod def backward(ctx, dy1, dy2): @@ -44,7 +44,7 @@ class TestPyLayer(unittest.TestCase): input1.stop_gradient = False input2.stop_gradient = False z = tanh.apply(input1, input1, paddle.tanh, paddle.square) - z = z[0] + z[1] + z = z[0] + z[2] z.mean().backward() z2 = paddle.tanh(input2) + paddle.tanh(input2) @@ -61,7 +61,7 @@ class TestPyLayer(unittest.TestCase): y1 = func1(x1) y2 = func1(x2) ctx.save_for_backward(y1, y2) - return y1, y2 + return 1, None, y1, y2, '' @staticmethod def backward(ctx, dy1, dy2): @@ -79,7 +79,7 @@ class TestPyLayer(unittest.TestCase): input3.stop_gradient = True input4.stop_gradient = True z = tanh.apply(input1, input3, paddle.tanh, paddle.square) - z = z[0] + z[1] + z = z[2] + z[3] z.mean().backward() z2 = paddle.tanh(input2) + paddle.tanh(input4) @@ -115,6 +115,27 @@ class TestPyLayer(unittest.TestCase): self.assertTrue( np.max(np.abs((input1.grad.numpy() - input2.grad.numpy()))) < 1e-10) + def test_pylayer_num_output_match(self): + class tanh(PyLayer): + @staticmethod + def forward( + ctx, + x1, + x2, ): + return x1 + x2 + + @staticmethod + def backward(ctx, dy1): + return dy1 + 1 + + input1 = paddle.randn([2, 3]).astype("float64") + input2 = input1.detach().clone() + input1.stop_gradient = False + input2.stop_gradient = False + z = tanh.apply(input1, input2) + with self.assertRaises(ValueError): + z.mean().backward() + def test_pylayer_dtype(self): class tanh(PyLayer): @staticmethod @@ -150,21 +171,21 @@ class TestPyLayer(unittest.TestCase): return args input1 = paddle.randn([2, 3]).astype("float64") - with self.assertRaises(NotImplementedError): + with self.assertRaises(ValueError): z = Layer_None1.apply(input1) class Layer_None2(PyLayer): @staticmethod def forward(ctx, *args): - return [None, None] + return [None, args[0]] @staticmethod def backward(ctx, *args): return args input1 = paddle.randn([2, 3]).astype("float64") - with self.assertRaises(NotImplementedError): - z = Layer_None2.apply(input1) + # return None + z = Layer_None2.apply(input1) class Layer_one1(PyLayer): @staticmethod @@ -176,21 +197,22 @@ class TestPyLayer(unittest.TestCase): return args input1 = paddle.randn([2, 3]).astype("float64") - with self.assertRaises(NotImplementedError): + # At least one output of `PyLayer.backward` is a `Tensor` + with self.assertRaises(ValueError): z = Layer_one1.apply(input1) class Layer_one2(PyLayer): @staticmethod def forward(ctx, *args): - return [1, 2] + return [1, 2, args[0]] @staticmethod def backward(ctx, *args): return args input1 = paddle.randn([2, 3]).astype("float64") - with self.assertRaises(NotImplementedError): - z = Layer_one2.apply(input1) + # return int + z = Layer_one2.apply(input1) class Layer_no_fw(PyLayer): @staticmethod @@ -234,8 +256,7 @@ class TestPyLayer(unittest.TestCase): z = Layer_bk_none1.apply(input2) with self.assertRaises(ValueError): - with paddle.fluid.dygraph.guard(): - z.sum().backward() + z.sum().backward() class Layer_bk_none2(PyLayer): @staticmethod @@ -249,9 +270,9 @@ class TestPyLayer(unittest.TestCase): input1 = paddle.randn([2, 3]).astype("float64") input1.stop_gradient = False z = Layer_bk_none2.apply(input1, input1) + with self.assertRaises(ValueError): - with paddle.fluid.dygraph.guard(): - z.mean().backward() + z.mean().backward() class Layer_bk_one1(PyLayer): @staticmethod @@ -265,9 +286,9 @@ class TestPyLayer(unittest.TestCase): input1 = paddle.randn([2, 3]).astype("float64") input1.stop_gradient = False z = Layer_bk_one1.apply(input1) + with self.assertRaises(ValueError): - with paddle.fluid.dygraph.guard(): - z.mean().backward() + z.mean().backward() class Layer_bk_one2(PyLayer): @staticmethod @@ -280,11 +301,11 @@ class TestPyLayer(unittest.TestCase): input1 = paddle.randn([2, 3]).astype("float64") input1.stop_gradient = False + y = Layer_bk_one2.apply(input1, input1) z = y[0] + y[1] with self.assertRaises(ValueError): - with paddle.fluid.dygraph.guard(): - z.mean().backward() + z.mean().backward() class Layer_no_bk(PyLayer): @staticmethod @@ -295,10 +316,9 @@ class TestPyLayer(unittest.TestCase): input1.stop_gradient = False z = Layer_no_bk.apply(input1) - with self.assertRaises(NotImplementedError): - with paddle.fluid.dygraph.guard(): - z = z[0] + z[1] - z.mean().backward() + with self.assertRaises(OSError): + z = z[0] + z[1] + z.mean().backward() class Layer_bk_match(PyLayer): @staticmethod @@ -313,9 +333,8 @@ class TestPyLayer(unittest.TestCase): input1.stop_gradient = False z = Layer_bk_match.apply(input1) with self.assertRaises(ValueError): - with paddle.fluid.dygraph.guard(): - z = z[0] + z[1] - z.mean().backward() + z = z[0] + z[1] + z.mean().backward() def test_pylayer_bk_return_none(self): class Layer_bk_none1(PyLayer): @@ -334,8 +353,7 @@ class TestPyLayer(unittest.TestCase): z = Layer_bk_none1.apply(input1, input2) with self.assertRaises(ValueError): - with paddle.fluid.dygraph.guard(): - z.mean().backward() + z.mean().backward() class Layer_bk_none2(PyLayer): @staticmethod @@ -353,8 +371,7 @@ class TestPyLayer(unittest.TestCase): z = Layer_bk_none2.apply(input1, input2) z = z[0] + z[1] with self.assertRaises(ValueError): - with paddle.fluid.dygraph.guard(): - z.mean().backward() + z.mean().backward() def test_pylayer_inplace(self): class cus_tanh(PyLayer):