未验证 提交 25e765a4 编写于 作者: L Leo Chen 提交者: GitHub

Copy stop_gradient property of variable in clone (#21825)

* copy stop_gradient property in clone, test=develop

* fix pruning issue, test=develop

* fix kLookupTablePath issue, test=develop

* sync program, test=develop

* copy subblocks, test=develop

* add unittests, test=develop
上级 013225bb
...@@ -4456,17 +4456,25 @@ class Program(object): ...@@ -4456,17 +4456,25 @@ class Program(object):
None None
""" """
if not isinstance(other, Program): if not isinstance(other, Program):
raise TypeError("_copy_param_info_from should be invoked with " raise TypeError("_copy_data_info_from should be invoked with "
"Program") "Program")
if len(self.blocks) != len(other.blocks): if len(self.blocks) != len(other.blocks):
raise ValueError("_copy_param_info_from should be invoked with two " raise ValueError("_copy_data_info_from should be invoked with two "
"program, with represent the same topology") "program, with represent the same topology")
for var in list(other.global_block().vars.values()):
# NOTE(zhiqiu): All vars in cloned program exist in original program.
# The reverse is not true, due to backward pruning.
for i, block in enumerate(other.blocks):
for var in list(block.vars.values()):
if not self.blocks[i].has_var(var.name):
continue
if var.is_data: if var.is_data:
self.global_block().var(var.name).is_data = True self.blocks[i].var(var.name).is_data = True
if var.desc.need_check_feed(): if var.desc.need_check_feed():
self.global_block().var(var.name).desc.set_need_check_feed(True) self.blocks[i].var(var.name).desc.set_need_check_feed(True)
if var.stop_gradient:
self.blocks[i].var(var.name).stop_gradient = True
@dygraph_not_support @dygraph_not_support
def list_vars(self): def list_vars(self):
......
...@@ -268,6 +268,10 @@ def save_vars(executor, ...@@ -268,6 +268,10 @@ def save_vars(executor,
outputs={}, outputs={},
attrs={'file_path': os.path.join(save_dirname, filename)}) attrs={'file_path': os.path.join(save_dirname, filename)})
#NOTE(zhiqiu): save op will add variable kLookupTablePath in save_program.desc,
# which leads to diff on save_program and its desc. Call _sync_with_cpp
# to keep consistency.
save_program._sync_with_cpp()
executor.run(save_program) executor.run(save_program)
......
...@@ -470,6 +470,8 @@ class TestBatchNormOpTraining(unittest.TestCase): ...@@ -470,6 +470,8 @@ class TestBatchNormOpTraining(unittest.TestCase):
grad_var = block.desc.find_var(arg.encode("ascii")) grad_var = block.desc.find_var(arg.encode("ascii"))
grad_var.set_dtype(core.VarDesc.VarType.FP32) grad_var.set_dtype(core.VarDesc.VarType.FP32)
program._sync_with_cpp()
exe = fluid.Executor(place) exe = fluid.Executor(place)
out = exe.run(program, out = exe.run(program,
feed={ feed={
......
...@@ -194,10 +194,106 @@ class TestDistMnist(unittest.TestCase): ...@@ -194,10 +194,106 @@ class TestDistMnist(unittest.TestCase):
startup_prog = t.get_startup_program(current_endpoint, pserver_prog) startup_prog = t.get_startup_program(current_endpoint, pserver_prog)
main = pserver_prog.clone() main = pserver_prog.clone()
startup = startup_prog.clone() startup = startup_prog.clone()
self.assertTrue(program_equal(main, pserver_prog)) self.assertTrue(program_equal(main, pserver_prog))
self.assertTrue(program_equal(startup, startup_prog)) self.assertTrue(program_equal(startup, startup_prog))
class TestCloneWithStopGradient(unittest.TestCase):
def test_clone_with_stop_gradient(self):
train_program = fluid.Program()
startup_program = fluid.Program()
with fluid.program_guard(train_program, startup_program):
img = fluid.layers.data(name='image', shape=[784])
hidden1 = fluid.layers.fc(input=img, size=200, act='relu')
hidden1.stop_gradient = True
hidden2 = fluid.layers.dropout(hidden1, dropout_prob=0.5)
loss = fluid.layers.cross_entropy(
input=fluid.layers.fc(hidden2, size=10, act='softmax'),
label=fluid.layers.data(
name='label', shape=[1], dtype='int64'))
avg_loss = fluid.layers.mean(loss)
test_program = train_program.clone(for_test=False)
self.assertEqual(
test_program.block(0).var(hidden1.name).stop_gradient, True)
self.assertEqual(
test_program.block(0).var(hidden2.name).stop_gradient, False)
class TestCloneWithStopGradientInSubBlock(unittest.TestCase):
def test_clone_with_stop_gradient(self):
train_program = fluid.Program()
startup_program = fluid.Program()
with fluid.program_guard(train_program, startup_program):
img = fluid.layers.data(name='image', shape=[784])
true = fluid.layers.ones(shape=[1], dtype="float32")
hidden1 = fluid.layers.fc(input=img, size=200, act='relu')
hidden1.stop_gradient = True
cond = fluid.layers.equal(true, true)
def true_fn():
hidden2 = fluid.layers.dropout(hidden1, dropout_prob=0.5)
hidden2.stop_gradient = True
return hidden2
def false_fn():
hidden2 = fluid.layers.dropout(hidden1, dropout_prob=0.6)
return hidden2
hidden2 = fluid.layers.cond(cond, true_fn, false_fn)
loss = fluid.layers.cross_entropy(
input=fluid.layers.fc(hidden2, size=10, act='softmax'),
label=fluid.layers.data(
name='label', shape=[1], dtype='int64'))
avg_loss = fluid.layers.mean(loss)
test_program = train_program.clone(for_test=False)
self.assertEqual(
test_program.block(0).var(hidden1.name).stop_gradient, True)
for var in test_program.block(1).vars.values():
var2 = train_program.block(1).var(var.name)
self.assertEqual(var.stop_gradient, var2.stop_gradient)
for var in test_program.block(2).vars.values():
var2 = train_program.block(2).var(var.name)
self.assertEqual(var.stop_gradient, var2.stop_gradient)
class TestCloneWithRaise(unittest.TestCase):
def test_clone_with_stop_gradient(self):
train_program = fluid.Program()
startup_program = fluid.Program()
with fluid.program_guard(train_program, startup_program):
img = fluid.layers.data(name='image', shape=[784])
true = fluid.layers.ones(shape=[1], dtype="float32")
hidden1 = fluid.layers.fc(input=img, size=200, act='relu')
hidden1.stop_gradient = True
cond = fluid.layers.equal(true, true)
def true_fn():
hidden2 = fluid.layers.dropout(hidden1, dropout_prob=0.5)
hidden2.stop_gradient = True
return hidden2
def false_fn():
hidden2 = fluid.layers.dropout(hidden1, dropout_prob=0.6)
return hidden2
hidden2 = fluid.layers.cond(cond, true_fn, false_fn)
loss = fluid.layers.cross_entropy(
input=fluid.layers.fc(hidden2, size=10, act='softmax'),
label=fluid.layers.data(
name='label', shape=[1], dtype='int64'))
avg_loss = fluid.layers.mean(loss)
test_program = train_program.clone(for_test=False)
self.assertRaises(ValueError, train_program._copy_data_info_from,
startup_program)
self.assertRaises(TypeError, train_program._copy_data_info_from,
startup_program.block(0))
if __name__ == "__main__": if __name__ == "__main__":
unittest.main() unittest.main()
...@@ -163,6 +163,8 @@ class TestInstanceNormOpTraining(unittest.TestCase): ...@@ -163,6 +163,8 @@ class TestInstanceNormOpTraining(unittest.TestCase):
grad_var = block.desc.find_var(arg.encode("ascii")) grad_var = block.desc.find_var(arg.encode("ascii"))
grad_var.set_dtype(core.VarDesc.VarType.FP32) grad_var.set_dtype(core.VarDesc.VarType.FP32)
program._sync_with_cpp()
exe = fluid.Executor(place) exe = fluid.Executor(place)
out = exe.run(program, out = exe.run(program,
feed={ feed={
......
...@@ -148,6 +148,8 @@ class TestLayerNormOp(unittest.TestCase): ...@@ -148,6 +148,8 @@ class TestLayerNormOp(unittest.TestCase):
grad_var = block.desc.find_var(arg.encode("ascii")) grad_var = block.desc.find_var(arg.encode("ascii"))
grad_var.set_dtype(core.VarDesc.VarType.FP32) grad_var.set_dtype(core.VarDesc.VarType.FP32)
program._sync_with_cpp()
exe = fluid.Executor(place) exe = fluid.Executor(place)
out = exe.run(program, out = exe.run(program,
feed={ feed={
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册