diff --git a/python/paddle/fluid/framework.py b/python/paddle/fluid/framework.py index 10b92bdb7b90326a9f188e219c8ae2c5524cb660..e3d764b892fc209a9339faea9ac9b15032d33dd5 100644 --- a/python/paddle/fluid/framework.py +++ b/python/paddle/fluid/framework.py @@ -4456,17 +4456,25 @@ class Program(object): None """ if not isinstance(other, Program): - raise TypeError("_copy_param_info_from should be invoked with " + raise TypeError("_copy_data_info_from should be invoked with " "Program") if len(self.blocks) != len(other.blocks): - raise ValueError("_copy_param_info_from should be invoked with two " + raise ValueError("_copy_data_info_from should be invoked with two " "program, with represent the same topology") - for var in list(other.global_block().vars.values()): - if var.is_data: - self.global_block().var(var.name).is_data = True - if var.desc.need_check_feed(): - self.global_block().var(var.name).desc.set_need_check_feed(True) + + # NOTE(zhiqiu): All vars in cloned program exist in original program. + # The reverse is not true, due to backward pruning. + for i, block in enumerate(other.blocks): + for var in list(block.vars.values()): + if not self.blocks[i].has_var(var.name): + continue + if var.is_data: + self.blocks[i].var(var.name).is_data = True + if var.desc.need_check_feed(): + self.blocks[i].var(var.name).desc.set_need_check_feed(True) + if var.stop_gradient: + self.blocks[i].var(var.name).stop_gradient = True @dygraph_not_support def list_vars(self): diff --git a/python/paddle/fluid/io.py b/python/paddle/fluid/io.py index eed3b79591c09b07923a8f290173fe4585bec2c4..6ca49eb335369e04d8fa8a65b3899b6b6817fa79 100644 --- a/python/paddle/fluid/io.py +++ b/python/paddle/fluid/io.py @@ -268,6 +268,10 @@ def save_vars(executor, outputs={}, attrs={'file_path': os.path.join(save_dirname, filename)}) + #NOTE(zhiqiu): save op will add variable kLookupTablePath in save_program.desc, + # which leads to diff on save_program and its desc. Call _sync_with_cpp + # to keep consistency. + save_program._sync_with_cpp() executor.run(save_program) diff --git a/python/paddle/fluid/tests/unittests/test_batch_norm_op.py b/python/paddle/fluid/tests/unittests/test_batch_norm_op.py index e25a09385c28ef81a45d099bc16939293ee5a696..519ec1e4ab026873a54f5e245d0bec49f9010149 100644 --- a/python/paddle/fluid/tests/unittests/test_batch_norm_op.py +++ b/python/paddle/fluid/tests/unittests/test_batch_norm_op.py @@ -470,6 +470,8 @@ class TestBatchNormOpTraining(unittest.TestCase): grad_var = block.desc.find_var(arg.encode("ascii")) grad_var.set_dtype(core.VarDesc.VarType.FP32) + program._sync_with_cpp() + exe = fluid.Executor(place) out = exe.run(program, feed={ diff --git a/python/paddle/fluid/tests/unittests/test_desc_clone.py b/python/paddle/fluid/tests/unittests/test_desc_clone.py index 82e704169e4b828549dab4c47b0fa46d9afd8f7e..e25de27b972a583e8dacc92210723e679019810b 100644 --- a/python/paddle/fluid/tests/unittests/test_desc_clone.py +++ b/python/paddle/fluid/tests/unittests/test_desc_clone.py @@ -194,10 +194,106 @@ class TestDistMnist(unittest.TestCase): startup_prog = t.get_startup_program(current_endpoint, pserver_prog) main = pserver_prog.clone() startup = startup_prog.clone() - self.assertTrue(program_equal(main, pserver_prog)) self.assertTrue(program_equal(startup, startup_prog)) +class TestCloneWithStopGradient(unittest.TestCase): + def test_clone_with_stop_gradient(self): + train_program = fluid.Program() + startup_program = fluid.Program() + with fluid.program_guard(train_program, startup_program): + img = fluid.layers.data(name='image', shape=[784]) + hidden1 = fluid.layers.fc(input=img, size=200, act='relu') + hidden1.stop_gradient = True + hidden2 = fluid.layers.dropout(hidden1, dropout_prob=0.5) + loss = fluid.layers.cross_entropy( + input=fluid.layers.fc(hidden2, size=10, act='softmax'), + label=fluid.layers.data( + name='label', shape=[1], dtype='int64')) + avg_loss = fluid.layers.mean(loss) + test_program = train_program.clone(for_test=False) + + self.assertEqual( + test_program.block(0).var(hidden1.name).stop_gradient, True) + self.assertEqual( + test_program.block(0).var(hidden2.name).stop_gradient, False) + + +class TestCloneWithStopGradientInSubBlock(unittest.TestCase): + def test_clone_with_stop_gradient(self): + train_program = fluid.Program() + startup_program = fluid.Program() + with fluid.program_guard(train_program, startup_program): + img = fluid.layers.data(name='image', shape=[784]) + true = fluid.layers.ones(shape=[1], dtype="float32") + hidden1 = fluid.layers.fc(input=img, size=200, act='relu') + hidden1.stop_gradient = True + + cond = fluid.layers.equal(true, true) + + def true_fn(): + hidden2 = fluid.layers.dropout(hidden1, dropout_prob=0.5) + hidden2.stop_gradient = True + return hidden2 + + def false_fn(): + hidden2 = fluid.layers.dropout(hidden1, dropout_prob=0.6) + return hidden2 + + hidden2 = fluid.layers.cond(cond, true_fn, false_fn) + + loss = fluid.layers.cross_entropy( + input=fluid.layers.fc(hidden2, size=10, act='softmax'), + label=fluid.layers.data( + name='label', shape=[1], dtype='int64')) + avg_loss = fluid.layers.mean(loss) + test_program = train_program.clone(for_test=False) + + self.assertEqual( + test_program.block(0).var(hidden1.name).stop_gradient, True) + for var in test_program.block(1).vars.values(): + var2 = train_program.block(1).var(var.name) + self.assertEqual(var.stop_gradient, var2.stop_gradient) + for var in test_program.block(2).vars.values(): + var2 = train_program.block(2).var(var.name) + self.assertEqual(var.stop_gradient, var2.stop_gradient) + + +class TestCloneWithRaise(unittest.TestCase): + def test_clone_with_stop_gradient(self): + train_program = fluid.Program() + startup_program = fluid.Program() + with fluid.program_guard(train_program, startup_program): + img = fluid.layers.data(name='image', shape=[784]) + true = fluid.layers.ones(shape=[1], dtype="float32") + hidden1 = fluid.layers.fc(input=img, size=200, act='relu') + hidden1.stop_gradient = True + + cond = fluid.layers.equal(true, true) + + def true_fn(): + hidden2 = fluid.layers.dropout(hidden1, dropout_prob=0.5) + hidden2.stop_gradient = True + return hidden2 + + def false_fn(): + hidden2 = fluid.layers.dropout(hidden1, dropout_prob=0.6) + return hidden2 + + hidden2 = fluid.layers.cond(cond, true_fn, false_fn) + loss = fluid.layers.cross_entropy( + input=fluid.layers.fc(hidden2, size=10, act='softmax'), + label=fluid.layers.data( + name='label', shape=[1], dtype='int64')) + avg_loss = fluid.layers.mean(loss) + test_program = train_program.clone(for_test=False) + + self.assertRaises(ValueError, train_program._copy_data_info_from, + startup_program) + self.assertRaises(TypeError, train_program._copy_data_info_from, + startup_program.block(0)) + + if __name__ == "__main__": unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_instance_norm_op.py b/python/paddle/fluid/tests/unittests/test_instance_norm_op.py index c02e48bd715b06fcf3950881447d3189377edf05..c58c830c2c53f4c120b8fe6d26398b2811d29bd3 100644 --- a/python/paddle/fluid/tests/unittests/test_instance_norm_op.py +++ b/python/paddle/fluid/tests/unittests/test_instance_norm_op.py @@ -163,6 +163,8 @@ class TestInstanceNormOpTraining(unittest.TestCase): grad_var = block.desc.find_var(arg.encode("ascii")) grad_var.set_dtype(core.VarDesc.VarType.FP32) + program._sync_with_cpp() + exe = fluid.Executor(place) out = exe.run(program, feed={ diff --git a/python/paddle/fluid/tests/unittests/test_layer_norm_op.py b/python/paddle/fluid/tests/unittests/test_layer_norm_op.py index 4bd7a98cb924a45ff60f4908627c386b20d9cbde..18e83f9a56964b1372ad199bc765577bd4847243 100644 --- a/python/paddle/fluid/tests/unittests/test_layer_norm_op.py +++ b/python/paddle/fluid/tests/unittests/test_layer_norm_op.py @@ -148,6 +148,8 @@ class TestLayerNormOp(unittest.TestCase): grad_var = block.desc.find_var(arg.encode("ascii")) grad_var.set_dtype(core.VarDesc.VarType.FP32) + program._sync_with_cpp() + exe = fluid.Executor(place) out = exe.run(program, feed={