未验证 提交 20a84820 编写于 作者: A Aurelius84 提交者: GitHub

fix unused var with zero gradient bug in fluid.gradient (#27246)

* fix calcu_gradients

* fix code place

* fix embedding interface usage
上级 33ff833a
...@@ -1756,6 +1756,12 @@ def calc_gradient(targets, inputs, target_gradients=None, no_grad_set=None): ...@@ -1756,6 +1756,12 @@ def calc_gradient(targets, inputs, target_gradients=None, no_grad_set=None):
op_path_dict = dict() op_path_dict = dict()
op_path = _find_op_path_(block, targets, inputs, block_no_grad_set, op_path = _find_op_path_(block, targets, inputs, block_no_grad_set,
op_path_dict) op_path_dict)
# find no grad var by op_path
no_grad_vars = _find_no_grad_vars(block, op_path, targets,
block_no_grad_set)
block_no_grad_set.update(no_grad_vars)
no_grad_dict[0].update(list(map(_append_grad_suffix_, block_no_grad_set))) no_grad_dict[0].update(list(map(_append_grad_suffix_, block_no_grad_set)))
grad_to_var = dict() grad_to_var = dict()
grad_info_map = dict() grad_info_map = dict()
......
...@@ -14,6 +14,7 @@ ...@@ -14,6 +14,7 @@
from __future__ import print_function from __future__ import print_function
import numpy as np import numpy as np
import paddle
import paddle.fluid as fluid import paddle.fluid as fluid
from paddle.fluid.layers.utils import flatten from paddle.fluid.layers.utils import flatten
from paddle.fluid.dygraph import declarative, ProgramTranslator from paddle.fluid.dygraph import declarative, ProgramTranslator
...@@ -151,5 +152,33 @@ class TestWithTrainAndEval(unittest.TestCase): ...@@ -151,5 +152,33 @@ class TestWithTrainAndEval(unittest.TestCase):
partial_layer._train_program) partial_layer._train_program)
class GPT2LMHeadModel(fluid.dygraph.Layer):
def __init__(self):
super(GPT2LMHeadModel, self).__init__()
self.embedding0 = paddle.nn.Embedding(20, 16)
self.embedding1 = paddle.nn.Embedding(20, 32)
self.lm_head_weight = paddle.to_tensor(
np.random.rand(2, 3).astype('float32'))
@declarative
def forward(self, x):
x = fluid.layers.reshape(x, shape=[-1, 6])
x1, x2, x3 = fluid.layers.split(input=x, dim=1, num_or_sections=3)
return x1
class TestPruneUnusedParamInProgram(unittest.TestCase):
def test_prune(self):
input_ids = np.array([[15, 11, 6, 3, 18, 13]]).astype("float32")
place = fluid.CPUPlace()
with fluid.dygraph.guard(place):
model = GPT2LMHeadModel()
model.eval()
input_ids = paddle.to_tensor(input_ids)
out = model(input_ids)
self.assertTrue(np.array_equal(out.numpy(), [[15, 11]]))
if __name__ == '__main__': if __name__ == '__main__':
unittest.main() unittest.main()
...@@ -15,7 +15,7 @@ ...@@ -15,7 +15,7 @@
from __future__ import print_function from __future__ import print_function
import unittest import unittest
import numpy as np
import paddle.fluid as fluid import paddle.fluid as fluid
import paddle.fluid.layers as layers import paddle.fluid.layers as layers
from paddle.fluid.backward import calc_gradient from paddle.fluid.backward import calc_gradient
...@@ -81,5 +81,22 @@ class TestDoubleGrad(unittest.TestCase): ...@@ -81,5 +81,22 @@ class TestDoubleGrad(unittest.TestCase):
self.assertEqual(12, out[0]) self.assertEqual(12, out[0])
class TestGradientWithPrune(unittest.TestCase):
def test_prune(self):
x = fluid.data(name='x', shape=[3], dtype='float32')
x.stop_gradient = False
x1, x2, x3 = fluid.layers.split(x, dim=0, num_or_sections=3)
y = x1 * 2
x1_grad = fluid.gradients(y, x)
exe = fluid.Executor(fluid.CPUPlace())
main = fluid.default_main_program()
exe.run(fluid.default_startup_program())
out = exe.run(main,
feed={'x': np.ones([3]).astype('float32')},
fetch_list=[x1_grad])
self.assertTrue(np.array_equal(out[0], [2., 0., 0.]))
if __name__ == "__main__": if __name__ == "__main__":
unittest.main() unittest.main()
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册