From 655f76d26394e006e01248606596cc71010c7b31 Mon Sep 17 00:00:00 2001 From: Weilong Wu Date: Thu, 20 Jan 2022 17:13:22 +0800 Subject: [PATCH] Modify Code AutoGen logics and Support test_imperative decorator and layer_children, layer_trainable (#38633) * Rearranged Eager AutoCodeGen directory structure * Removed USE_OP in Eager AutoCodeGen * Enabled generation for Operators without Grad/Inputs/Outputs * Resolved operators without input * Fixed merge conflicts * Enabled Eager AutoCodeGen for 10+ more operators * Refactored Eager AutoCodeGen with more organized helper objects * Enabled Eager AutoCodeGen for operators with multiple OpBases * Adjusted Eager AutoCodeGen to Enable Passing Output Tensor as Input Argument * Handled Dispensable Inputs/Outputs in Eager AutoCodeGen * Adjusted function generation/call between Python-C API & Dygraph API * Synchronized auto-generated Python-C API with Dygraph Forward Functions * support more eager tensor api * fix merge compile error * fix compile error and fit develop code * support pure CPU * fix some logic error in eager_mode * support _varbase_creator in eager mode * Added safe_initialized interface to EagerTensor for use in processing dispensable inputs * for eager mode * refine * support multiple constructor for eager tensor * add place related code * polish code * specific randint with dtype of int64 * Support pure cpu test * eager logic * refine test in pure cpu * eager logic * eager logic * eager logic, test=develop * skip core.eager when in inference, test=develop * refine, test=develop * refine, test=develop * call RetainGrad after run forward kernel, test=develop * refine, test=develop * support dygraph util, meta, guard test * eager test case * support inference test * refine test and fix initializer failed * modify eagertensor patch method * add eagertensor.clear_grandint, test=develop * refine, test=develop * refine, test=develop * refine, test=develop * support create varbase and fix retain grad error * call monkey_patch_varbase in _test_eager_guard, test=develop * fix windows error * split clear_gradient to clear_gradient and zero_grads, test=develop * refine, test=develop * refine, test=develop * support test_imperative_basic test in eager mode * remove additional log in variable.h * remove additional log in variable.h * remove additional code create in merge * eager * fix some eager logic, test=develop * refine, test=develop * refine, test=develop * Support test_imperative decorator and layer_children, layer_trainable * Compare ori_dygraph and new_egr * refine, test=develop * patch_tensor_method_func, test=develop * refine, test=develop * eager test case, test=develop * refine, test=develop * Updated assert_equal func * eager, test=develop * Updated assert statement * eager, test=develop * eager optimizer, test=develop * eager optimizer, test=develop * eager test_imperative_optimizer_v2, test=develop * eager, test=develop * refine, test=develop * refine, test=develop * eager, test=develop * add resize in share buffer to, test=develop * eager, test=develop * fix _share_buffer_to, test=develop * refine, test=develop * refine, test=develop * support eager for dataloader,test=develop * Modified eager_generator logic to use ptr * Updated eager_generator logic Co-authored-by: jim19930609 Co-authored-by: JiabinYang <360788950@qq.com> Co-authored-by: Wang Huan Co-authored-by: wanghuancoder --- .../eager_generated/forwards/scale.cc | 4 +- .../auto_code_generator/eager_generator.cc | 51 ++++++++++++------- paddle/fluid/eager/grad_node_info.cc | 14 ++--- paddle/fluid/eager/grad_node_info.h | 4 +- .../grad_node_info_test.cc | 4 +- .../unittests/test_imperative_decorator.py | 23 ++++++--- .../test_imperative_layer_children.py | 17 +++++-- .../test_imperative_layer_trainable.py | 8 ++- 8 files changed, 84 insertions(+), 41 deletions(-) diff --git a/paddle/fluid/eager/api/generated/eager_generated/forwards/scale.cc b/paddle/fluid/eager/api/generated/eager_generated/forwards/scale.cc index 642302a411..6390f06640 100644 --- a/paddle/fluid/eager/api/generated/eager_generated/forwards/scale.cc +++ b/paddle/fluid/eager/api/generated/eager_generated/forwards/scale.cc @@ -85,9 +85,9 @@ egr::EagerTensor scale(const egr::EagerTensor& x, float scale, float bias, scale_node->SetTensorWrappers_X({x}); // Set Grad out rank as same as fwd input and set stop gradient to bwd - scale_node->SetGradOutMeta(*p_autograd_in, /*slot id*/ 0); + scale_node->SetGradOutMeta(p_autograd_in, /*slot id*/ 0); // Set Grad out rank as same as fwd input and set stop gradient to bwd - scale_node->SetGradInMeta(*p_autograd_out, /*slot id*/ 0); + scale_node->SetGradInMeta(p_autograd_out, /*slot id*/ 0); // Set History for output set current Grad Node for EagerUtils::SetHistory(p_autograd_out, scale_node); diff --git a/paddle/fluid/eager/auto_code_generator/eager_generator.cc b/paddle/fluid/eager/auto_code_generator/eager_generator.cc index 11e033e1e5..3ffe02e2bc 100644 --- a/paddle/fluid/eager/auto_code_generator/eager_generator.cc +++ b/paddle/fluid/eager/auto_code_generator/eager_generator.cc @@ -990,8 +990,8 @@ static std::string GenerateGradNodeCreationContent( } else { const char* GET_SINGLE_AUTOGRAD_META_TEMPLATE = - " egr::AutogradMeta& %s = " - "*egr::EagerUtils::nullable_autograd_meta(%s);\n"; + " egr::AutogradMeta* %s = " + "egr::EagerUtils::nullable_autograd_meta(%s);\n"; get_autograd_meta_str += paddle::string::Sprintf( GET_SINGLE_AUTOGRAD_META_TEMPLATE, input_autograd_name, input_name); } @@ -1014,8 +1014,8 @@ static std::string GenerateGradNodeCreationContent( } else { const char* GET_SINGLE_AUTOGRAD_META_TEMPLATE = - " egr::AutogradMeta& %s = " - "*egr::EagerUtils::autograd_meta(&%s);\n"; + " egr::AutogradMeta* %s = " + "egr::EagerUtils::autograd_meta(&%s);\n"; get_autograd_meta_str += paddle::string::Sprintf( GET_SINGLE_AUTOGRAD_META_TEMPLATE, output_autograd_name, output_name); } @@ -1082,15 +1082,14 @@ static std::string GenerateGradNodeCreationContent( const std::string& input_name = input.name(); const std::string& input_autograd_name = "p_autograd_" + input_name; - if (input.dispensable() && !input.duplicable()) { + if (!input.duplicable()) { compute_require_grad_args += ", " + input_autograd_name; size_t input_position = fwd_inputs_name_pos_map.at(input_name); const char* SET_GRAD_OUT_META_TEMPLATE = - " if(%s) grad_node->SetGradOutMeta(*%s, %d);\n"; + " grad_node->SetGradOutMeta(%s, %d);\n"; grad_node_creation_str += paddle::string::Sprintf( - SET_GRAD_OUT_META_TEMPLATE, input_autograd_name, input_autograd_name, - input_position); + SET_GRAD_OUT_META_TEMPLATE, input_autograd_name, input_position); const char* ADD_EDGES_TEMPLATE = " if(%s) grad_node->AddEdges(%s, %d);\n"; @@ -1119,24 +1118,38 @@ static std::string GenerateGradNodeCreationContent( for (const proto::OpProto::Var& output : out_vars) { const std::string& output_name = output.name(); const std::string& output_autograd_name = "p_autograd_" + output_name; - pass_stop_gradient_args += ", &" + output_autograd_name; size_t output_position = fwd_outputs_name_pos_map.at(output_name); + if (output.duplicable()) { + pass_stop_gradient_args += ", &" + output_autograd_name; + const char* SET_OUT_RANK_TEMPLATE = + " egr::EagerUtils::SetOutRankWithSlot(&%s, %d);\n"; + grad_node_creation_str += paddle::string::Sprintf( + SET_OUT_RANK_TEMPLATE, output_autograd_name, output_position); + + const char* SET_HISTORY_TEMPLATE = + " egr::EagerUtils::SetHistory(&%s, grad_node);\n"; + grad_node_creation_str += + paddle::string::Sprintf(SET_HISTORY_TEMPLATE, output_autograd_name); + + } else { + pass_stop_gradient_args += ", " + output_autograd_name; + const char* SET_OUT_RANK_TEMPLATE = + " egr::EagerUtils::SetOutRankWithSlot(%s, %d);\n"; + grad_node_creation_str += paddle::string::Sprintf( + SET_OUT_RANK_TEMPLATE, output_autograd_name, output_position); + + const char* SET_HISTORY_TEMPLATE = + " egr::EagerUtils::SetHistory(%s, grad_node);\n"; + grad_node_creation_str += + paddle::string::Sprintf(SET_HISTORY_TEMPLATE, output_autograd_name); + } + const char* SET_GRAD_IN_META_TEMPLATE = " grad_node->SetGradInMeta(%s, %d);\n"; grad_node_creation_str += paddle::string::Sprintf( SET_GRAD_IN_META_TEMPLATE, output_autograd_name, output_position); - const char* SET_OUT_RANK_TEMPLATE = - " egr::EagerUtils::SetOutRankWithSlot(&%s, %d);\n"; - grad_node_creation_str += paddle::string::Sprintf( - SET_OUT_RANK_TEMPLATE, output_autograd_name, output_position); - - const char* SET_HISTORY_TEMPLATE = - " egr::EagerUtils::SetHistory(&%s, grad_node);\n"; - grad_node_creation_str += - paddle::string::Sprintf(SET_HISTORY_TEMPLATE, output_autograd_name); - VLOG(6) << "Generated Call RetainGradForTensor"; const char* RETAIN_GRAD_TEMPLATE = " egr::EagerUtils::CheckAndRetainGrad(%s);\n"; diff --git a/paddle/fluid/eager/grad_node_info.cc b/paddle/fluid/eager/grad_node_info.cc index 49bd416d46..e3649726cf 100644 --- a/paddle/fluid/eager/grad_node_info.cc +++ b/paddle/fluid/eager/grad_node_info.cc @@ -121,8 +121,7 @@ void GradNodeBase::SetGradInMeta(const std::vector& fwd_out, } } -void GradNodeBase::SetGradInMeta(const AutogradMeta& fwd_out, - size_t slot_rank) { +void GradNodeBase::SetGradInMeta(AutogradMeta* fwd_out, size_t slot_rank) { PADDLE_ENFORCE_LE( slot_rank, (bwd_in_meta_.size() - 1), paddle::platform::errors::InvalidArgument( @@ -138,7 +137,7 @@ void GradNodeBase::SetGradInMeta(const AutogradMeta& fwd_out, // Init stop gradient vector before use to avoid push back VLOG(7) << "Init bwd_in_meta_ with slot rank: " << slot_rank; meta.Init(1); - meta.SetStopGradient(0, fwd_out.StopGradient()); + meta.SetStopGradient(0, fwd_out->StopGradient()); } void GradNodeBase::SetGradOutMeta(const std::vector& fwd_in, @@ -171,8 +170,7 @@ void GradNodeBase::SetGradOutMeta(const std::vector& fwd_in, } } -void GradNodeBase::SetGradOutMeta(const AutogradMeta& fwd_in, - size_t slot_rank) { +void GradNodeBase::SetGradOutMeta(AutogradMeta* fwd_in, size_t slot_rank) { PADDLE_ENFORCE_LE( (slot_rank + 1), bwd_out_meta_.size(), paddle::platform::errors::InvalidArgument( @@ -187,7 +185,11 @@ void GradNodeBase::SetGradOutMeta(const AutogradMeta& fwd_in, "error, it indicates bugs in framework.")); // Init stop gradient vector before use to avoid push back meta.Init(1); - meta.SetStopGradient(0, fwd_in.StopGradient()); + if (fwd_in) { + meta.SetStopGradient(0, fwd_in->StopGradient()); + } else { + meta.SetStopGradient(0, true); + } } void GradNodeBase::SetDefaultGradInOutMeta() { diff --git a/paddle/fluid/eager/grad_node_info.h b/paddle/fluid/eager/grad_node_info.h index 5cf0b90220..2581096387 100644 --- a/paddle/fluid/eager/grad_node_info.h +++ b/paddle/fluid/eager/grad_node_info.h @@ -123,11 +123,11 @@ class GradNodeBase { void SetGradInMeta(const std::vector& fwd_out, size_t slot_rank); - void SetGradInMeta(const AutogradMeta& fwd_out, size_t slot_rank); + void SetGradInMeta(AutogradMeta* fwd_out, size_t slot_rank); void SetGradOutMeta(const std::vector& fwd_in, size_t slot_rank); - void SetGradOutMeta(const AutogradMeta& fwd_in, size_t slot_rank); + void SetGradOutMeta(AutogradMeta* fwd_in, size_t slot_rank); /** * Default setters for Grad in/out meta this should be used for same special diff --git a/paddle/fluid/eager/tests/data_structure_tests/grad_node_info_test.cc b/paddle/fluid/eager/tests/data_structure_tests/grad_node_info_test.cc index 7f6609b88a..19850b3210 100644 --- a/paddle/fluid/eager/tests/data_structure_tests/grad_node_info_test.cc +++ b/paddle/fluid/eager/tests/data_structure_tests/grad_node_info_test.cc @@ -76,9 +76,9 @@ TEST(GradNodeInfo, GradNodeBase) { VLOG(6) << "Test Set Meta and Get Meta"; auto_grad1->SetStopGradient(true); grad_test_node0->SetGradInMeta(metas, 0); - grad_test_node0->SetGradInMeta(*auto_grad1.get(), 1); + grad_test_node0->SetGradInMeta(auto_grad1.get(), 1); grad_test_node0->SetGradOutMeta(metas, 0); - grad_test_node0->SetGradOutMeta(*auto_grad1.get(), 1); + grad_test_node0->SetGradOutMeta(auto_grad1.get(), 1); CHECK_EQ(grad_test_node0->InputMeta()[0].Size(), 1); CHECK_EQ(grad_test_node0->InputMeta()[1].Size(), 1); CHECK(grad_test_node0->OutputMeta()[0].IsStopGradient(0)); diff --git a/python/paddle/fluid/tests/unittests/test_imperative_decorator.py b/python/paddle/fluid/tests/unittests/test_imperative_decorator.py index 6f86a0c0d6..376b0e00bb 100644 --- a/python/paddle/fluid/tests/unittests/test_imperative_decorator.py +++ b/python/paddle/fluid/tests/unittests/test_imperative_decorator.py @@ -19,6 +19,7 @@ import unittest import inspect from test_imperative_base import new_program_scope +from paddle.fluid.framework import _test_eager_guard class TestTracerMode(unittest.TestCase): @@ -45,7 +46,7 @@ class TestTracerMode(unittest.TestCase): finally: self.assertEqual(rlt, ans) - def test_main(self): + def func_main(self): with fluid.dygraph.guard(): self.tracer = framework._dygraph_tracer() self.tracer._train_mode = self.init_mode @@ -58,8 +59,8 @@ class TestTracerMode(unittest.TestCase): decorated_func = fluid.dygraph.no_grad(need_no_grad_func) self.assertTrue( - str(inspect.getargspec(decorated_func)) == - str(inspect.getargspec(need_no_grad_func))) + str(inspect.getfullargspec(decorated_func)) == + str(inspect.getfullargspec(need_no_grad_func))) self.assertEqual(self.tracer._train_mode, self.init_mode) @@ -70,6 +71,11 @@ class TestTracerMode(unittest.TestCase): with new_program_scope(): self.check_not_support_rlt(True) + def test_main(self): + with _test_eager_guard(): + self.func_main() + self.func_main() + class TestTracerMode2(TestTracerMode): def setUp(self): @@ -83,7 +89,7 @@ class TestNoGradClass(unittest.TestCase): self.assertEqual(self.tracer._has_grad, False) return a - def test_main(self): + def func_main(self): paddle.disable_static() self.tracer = framework._dygraph_tracer() @@ -97,8 +103,8 @@ class TestNoGradClass(unittest.TestCase): decorated_func = paddle.no_grad()(need_no_grad_func) self.assertEqual( - str(inspect.getargspec(decorated_func)), - str(inspect.getargspec(need_no_grad_func))) + str(inspect.getfullargspec(decorated_func)), + str(inspect.getfullargspec(need_no_grad_func))) def test_gen(): for i in range(3): @@ -119,6 +125,11 @@ class TestNoGradClass(unittest.TestCase): self.assertEqual(a, b) + def test_main(self): + with _test_eager_guard(): + self.func_main() + self.func_main() + if __name__ == '__main__': unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_imperative_layer_children.py b/python/paddle/fluid/tests/unittests/test_imperative_layer_children.py index 870d48f2fb..0cce1efd1f 100644 --- a/python/paddle/fluid/tests/unittests/test_imperative_layer_children.py +++ b/python/paddle/fluid/tests/unittests/test_imperative_layer_children.py @@ -21,6 +21,7 @@ import paddle.nn as nn import paddle.fluid as fluid import numpy as np +from paddle.fluid.framework import _test_eager_guard class LeNetDygraph(fluid.dygraph.Layer): @@ -38,12 +39,11 @@ class LeNetDygraph(fluid.dygraph.Layer): def forward(self, inputs): x = self.features(inputs) - return x class TestLayerChildren(unittest.TestCase): - def test_apply_init_weight(self): + def func_apply_init_weight(self): with fluid.dygraph.guard(): net = LeNetDygraph() net.eval() @@ -52,11 +52,22 @@ class TestLayerChildren(unittest.TestCase): net_layers.eval() x = paddle.rand([2, 1, 28, 28]) - y1 = net(x) y2 = net_layers(x) np.testing.assert_allclose(y1.numpy(), y2.numpy()) + return y1, y2 + + def test_func_apply_init_weight(self): + with _test_eager_guard(): + paddle.seed(102) + self.new_y1, self.new_y2 = self.func_apply_init_weight() + paddle.seed(102) + self.ori_y1, self.ori_y2 = self.func_apply_init_weight() + + # compare ori dygraph and new egr + assert np.array_equal(self.ori_y1.numpy(), self.new_y1.numpy()) + assert np.array_equal(self.ori_y2.numpy(), self.new_y2.numpy()) if __name__ == '__main__': diff --git a/python/paddle/fluid/tests/unittests/test_imperative_layer_trainable.py b/python/paddle/fluid/tests/unittests/test_imperative_layer_trainable.py index 909b1be0f7..b0dcfd653f 100644 --- a/python/paddle/fluid/tests/unittests/test_imperative_layer_trainable.py +++ b/python/paddle/fluid/tests/unittests/test_imperative_layer_trainable.py @@ -17,10 +17,11 @@ import paddle.fluid as fluid import numpy as np import paddle.fluid.dygraph as dygraph +from paddle.fluid.framework import _test_eager_guard class TestImperativeLayerTrainable(unittest.TestCase): - def test_set_trainable(self): + def func_set_trainable(self): with fluid.dygraph.guard(): label = np.random.uniform(-1, 1, [10, 10]).astype(np.float32) @@ -42,6 +43,11 @@ class TestImperativeLayerTrainable(unittest.TestCase): with self.assertRaises(ValueError): linear.weight.trainable = "1" + def test_set_trainable(self): + with _test_eager_guard(): + self.func_set_trainable() + self.func_set_trainable() + if __name__ == '__main__': unittest.main() -- GitLab