未验证 提交 20a84820 编写于 作者: A Aurelius84 提交者: GitHub

fix unused var with zero gradient bug in fluid.gradient (#27246)

* fix calcu_gradients

* fix code place

* fix embedding interface usage
上级 33ff833a
......@@ -1756,6 +1756,12 @@ def calc_gradient(targets, inputs, target_gradients=None, no_grad_set=None):
op_path_dict = dict()
op_path = _find_op_path_(block, targets, inputs, block_no_grad_set,
op_path_dict)
# find no grad var by op_path
no_grad_vars = _find_no_grad_vars(block, op_path, targets,
block_no_grad_set)
block_no_grad_set.update(no_grad_vars)
no_grad_dict[0].update(list(map(_append_grad_suffix_, block_no_grad_set)))
grad_to_var = dict()
grad_info_map = dict()
......
......@@ -14,6 +14,7 @@
from __future__ import print_function
import numpy as np
import paddle
import paddle.fluid as fluid
from paddle.fluid.layers.utils import flatten
from paddle.fluid.dygraph import declarative, ProgramTranslator
......@@ -151,5 +152,33 @@ class TestWithTrainAndEval(unittest.TestCase):
partial_layer._train_program)
class GPT2LMHeadModel(fluid.dygraph.Layer):
def __init__(self):
super(GPT2LMHeadModel, self).__init__()
self.embedding0 = paddle.nn.Embedding(20, 16)
self.embedding1 = paddle.nn.Embedding(20, 32)
self.lm_head_weight = paddle.to_tensor(
np.random.rand(2, 3).astype('float32'))
@declarative
def forward(self, x):
x = fluid.layers.reshape(x, shape=[-1, 6])
x1, x2, x3 = fluid.layers.split(input=x, dim=1, num_or_sections=3)
return x1
class TestPruneUnusedParamInProgram(unittest.TestCase):
def test_prune(self):
input_ids = np.array([[15, 11, 6, 3, 18, 13]]).astype("float32")
place = fluid.CPUPlace()
with fluid.dygraph.guard(place):
model = GPT2LMHeadModel()
model.eval()
input_ids = paddle.to_tensor(input_ids)
out = model(input_ids)
self.assertTrue(np.array_equal(out.numpy(), [[15, 11]]))
if __name__ == '__main__':
unittest.main()
......@@ -15,7 +15,7 @@
from __future__ import print_function
import unittest
import numpy as np
import paddle.fluid as fluid
import paddle.fluid.layers as layers
from paddle.fluid.backward import calc_gradient
......@@ -81,5 +81,22 @@ class TestDoubleGrad(unittest.TestCase):
self.assertEqual(12, out[0])
class TestGradientWithPrune(unittest.TestCase):
def test_prune(self):
x = fluid.data(name='x', shape=[3], dtype='float32')
x.stop_gradient = False
x1, x2, x3 = fluid.layers.split(x, dim=0, num_or_sections=3)
y = x1 * 2
x1_grad = fluid.gradients(y, x)
exe = fluid.Executor(fluid.CPUPlace())
main = fluid.default_main_program()
exe.run(fluid.default_startup_program())
out = exe.run(main,
feed={'x': np.ones([3]).astype('float32')},
fetch_list=[x1_grad])
self.assertTrue(np.array_equal(out[0], [2., 0., 0.]))
if __name__ == "__main__":
unittest.main()
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册