diff --git a/python/paddle/fluid/backward.py b/python/paddle/fluid/backward.py index 898c7d295641863740288e3f4e1da39266bce183..d51cacd1a5cad53ef77b325e5380100c537e057e 100644 --- a/python/paddle/fluid/backward.py +++ b/python/paddle/fluid/backward.py @@ -1756,6 +1756,12 @@ def calc_gradient(targets, inputs, target_gradients=None, no_grad_set=None): op_path_dict = dict() op_path = _find_op_path_(block, targets, inputs, block_no_grad_set, op_path_dict) + + # find no grad var by op_path + no_grad_vars = _find_no_grad_vars(block, op_path, targets, + block_no_grad_set) + block_no_grad_set.update(no_grad_vars) + no_grad_dict[0].update(list(map(_append_grad_suffix_, block_no_grad_set))) grad_to_var = dict() grad_info_map = dict() diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_partial_program.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_partial_program.py index f0fbe54f9dbbf93121655e784601467c13b3a70d..91067f360995e1661c200df923a698f3f146b71e 100644 --- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_partial_program.py +++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_partial_program.py @@ -14,6 +14,7 @@ from __future__ import print_function import numpy as np +import paddle import paddle.fluid as fluid from paddle.fluid.layers.utils import flatten from paddle.fluid.dygraph import declarative, ProgramTranslator @@ -151,5 +152,33 @@ class TestWithTrainAndEval(unittest.TestCase): partial_layer._train_program) +class GPT2LMHeadModel(fluid.dygraph.Layer): + def __init__(self): + super(GPT2LMHeadModel, self).__init__() + self.embedding0 = paddle.nn.Embedding(20, 16) + self.embedding1 = paddle.nn.Embedding(20, 32) + self.lm_head_weight = paddle.to_tensor( + np.random.rand(2, 3).astype('float32')) + + @declarative + def forward(self, x): + x = fluid.layers.reshape(x, shape=[-1, 6]) + x1, x2, x3 = fluid.layers.split(input=x, dim=1, num_or_sections=3) + return x1 + + +class TestPruneUnusedParamInProgram(unittest.TestCase): + def test_prune(self): + input_ids = np.array([[15, 11, 6, 3, 18, 13]]).astype("float32") + + place = fluid.CPUPlace() + with fluid.dygraph.guard(place): + model = GPT2LMHeadModel() + model.eval() + input_ids = paddle.to_tensor(input_ids) + out = model(input_ids) + self.assertTrue(np.array_equal(out.numpy(), [[15, 11]])) + + if __name__ == '__main__': unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_calc_gradient.py b/python/paddle/fluid/tests/unittests/test_calc_gradient.py index 3e8c449d8995ca90401861e93f2fb987d1c6967d..fdfaf6a3113bbb9a50a79de7ef4ac4c3251d5759 100644 --- a/python/paddle/fluid/tests/unittests/test_calc_gradient.py +++ b/python/paddle/fluid/tests/unittests/test_calc_gradient.py @@ -15,7 +15,7 @@ from __future__ import print_function import unittest - +import numpy as np import paddle.fluid as fluid import paddle.fluid.layers as layers from paddle.fluid.backward import calc_gradient @@ -81,5 +81,22 @@ class TestDoubleGrad(unittest.TestCase): self.assertEqual(12, out[0]) +class TestGradientWithPrune(unittest.TestCase): + def test_prune(self): + x = fluid.data(name='x', shape=[3], dtype='float32') + x.stop_gradient = False + x1, x2, x3 = fluid.layers.split(x, dim=0, num_or_sections=3) + y = x1 * 2 + x1_grad = fluid.gradients(y, x) + + exe = fluid.Executor(fluid.CPUPlace()) + main = fluid.default_main_program() + exe.run(fluid.default_startup_program()) + out = exe.run(main, + feed={'x': np.ones([3]).astype('float32')}, + fetch_list=[x1_grad]) + self.assertTrue(np.array_equal(out[0], [2., 0., 0.])) + + if __name__ == "__main__": unittest.main()