From ca1185d06ba6c80792d60ab3f1ae47c4f3fcf73d Mon Sep 17 00:00:00 2001 From: Aurelius84 Date: Tue, 21 Jul 2020 10:37:18 +0800 Subject: [PATCH] [Dy2Stat] Fix scope in run_program_op (#25579) * add reinforcement learning model test=develop * align backward test=develop * add gym in paddle_build.sh test=develop * rm pip install in script test=develop * refine paddle_build.sh test=develop * fix sed error in macOS test=develop * polish code test=develop * fix scope problem * refine code by reviewer comment --- paddle/fluid/operators/run_program_op.h | 39 +++++++++++++++---- .../test_reinforcement_learning.py | 9 ++--- 2 files changed, 35 insertions(+), 13 deletions(-) diff --git a/paddle/fluid/operators/run_program_op.h b/paddle/fluid/operators/run_program_op.h index 830334043c4..c0fbc336e46 100644 --- a/paddle/fluid/operators/run_program_op.h +++ b/paddle/fluid/operators/run_program_op.h @@ -232,10 +232,15 @@ class RunProgramOpKernel : public framework::OpKernel { auto exe_ctx = exe.Prepare(*program, 0, skip_vars); - // get scope and clear old vars - framework::Scope &scope = *(out_scope_vec->front()); - auto local_vars = scope.LocalVarNames(); - scope.EraseVars(local_vars); + // NOTE(Aurelius84): While training some models, forward can be called many + // times and then apply backpropagation all at once, such as Reinforcement + // Learning. Tensor data in multi-step training should be saved into single + // scope separately. Otherwise, the gradients can be miscalculated because + // always using the Tensor data of the last step in forward. + framework::Scope *global_inner_scope = out_scope_vec->front(); + VLOG(2) << "The number of sub scopes before forward: " + << out_scope_vec->front()->kids().size(); + framework::Scope &scope = global_inner_scope->NewScope(); // share input_vars & parameters into scope details::ShareVarsIntoScope(input_vars, input_var_names, &scope); @@ -251,6 +256,12 @@ class RunProgramOpKernel : public framework::OpKernel { // Debug info: scope info when run end VLOG(3) << framework::GenScopeTreeDebugInfo(out_scope_vec->front()); + // Step 5. Drop all children scopes while testing. + if (is_test) { + out_scope_vec->front()->DropKids(); + } + VLOG(2) << "The number of sub scopes after forward: " + << out_scope_vec->front()->kids().size(); } }; @@ -285,8 +296,8 @@ class RunProgramGradOpKernel : public framework::OpKernel { auto orig_end_op_index = ctx.Attr("end_op_index"); // NOTE: skip `shape` and `fill_constant` op created by - // fluid.backward.gradients, - // one forward output will generate one `shape` and `fill_constant` + // fluid.backward.gradients, one forward output will generate one `shape` + // and `fill_constant` int64_t start_op_index = orig_end_op_index + (output_grad_vars.size() * 2); int64_t end_op_index = block->OpSize(); @@ -295,7 +306,16 @@ class RunProgramGradOpKernel : public framework::OpKernel { out_scope_vec->size(), 1, platform::errors::InvalidArgument( "The OutScope of RunProgramGradOp should only hold one scope.")); - auto &scope = *(out_scope_vec->front()); + + framework::Scope *global_inner_scope = out_scope_vec->front(); + auto sub_scope_num = global_inner_scope->kids().size(); + VLOG(2) << "The number of sub scopes before backward: " << sub_scope_num; + PADDLE_ENFORCE_GT(sub_scope_num, 0, + platform::errors::InvalidArgument( + "The OutScope of RunProgramGradOp should hold at " + "least one sub scope.")); + + auto &scope = *(global_inner_scope->kids().front()); // Step 2. prepare executor and scope framework::Executor exe(ctx.GetPlace()); @@ -324,6 +344,11 @@ class RunProgramGradOpKernel : public framework::OpKernel { // Step 4. get outputs details::ShareVarsFromScope(input_grad_vars, input_grad_var_names, &scope); details::ShareVarsFromScope(param_grad_vars, param_grad_names, &scope); + + // Step5. drop current scope + global_inner_scope->DeleteScope(&scope); + VLOG(2) << "The number of sub scopes after backward: " + << global_inner_scope->kids().size(); } }; diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_reinforcement_learning.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_reinforcement_learning.py index 2f753cd5cfc..48139301597 100644 --- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_reinforcement_learning.py +++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_reinforcement_learning.py @@ -112,7 +112,7 @@ def train(args, place, to_static): state = to_variable(state) state.stop_gradient = True loss_probs = policy(state) - # print(loss_probs.name) + probs = loss_probs.numpy() action, _mask = sample_action(probs[0]) @@ -166,10 +166,8 @@ def train(args, place, to_static): running_reward = 10 for i_episode in itertools.count(1): state, ep_reward = env.reset(), 0 - # TODO(Aurelius84): In RL, we continuously select actions with multiple steps, - # then accumulate loss to apply optimization. But currently all vars shared with - # the same inner scope, which has problem in backward. I will fix it in next PR. - for t in range(1, 2): # default 1000 + # The default loop number is 10000 is models, we changed it to 1000 for smaller test + for t in range(1, 1000): state = np.array(state).astype("float32") action, loss = select_action(state) state, reward, done, _ = env.step(action) @@ -203,7 +201,6 @@ class TestDeclarative(unittest.TestCase): def setUp(self): self.place = fluid.CUDAPlace(0) if fluid.is_compiled_with_cuda() \ else fluid.CPUPlace() - self.args = Args() def test_train(self): -- GitLab