From ca1185d06ba6c80792d60ab3f1ae47c4f3fcf73d Mon Sep 17 00:00:00 2001
From: Aurelius84 <zhangliujie@baidu.com>
Date: Tue, 21 Jul 2020 10:37:18 +0800
Subject: [PATCH] [Dy2Stat] Fix scope in run_program_op (#25579)

* add reinforcement learning model test=develop

* align backward test=develop

* add gym in paddle_build.sh test=develop

* rm pip install in script test=develop

* refine paddle_build.sh test=develop

* fix sed error in macOS test=develop

* polish code test=develop

* fix scope problem

* refine code by reviewer comment
---
 paddle/fluid/operators/run_program_op.h       | 39 +++++++++++++++----
 .../test_reinforcement_learning.py            |  9 ++---
 2 files changed, 35 insertions(+), 13 deletions(-)
diff --git a/paddle/fluid/operators/run_program_op.h b/paddle/fluid/operators/run_program_op.h
index 830334043c..c0fbc336e4 100644
--- a/paddle/fluid/operators/run_program_op.h
+++ b/paddle/fluid/operators/run_program_op.h
@@ -232,10 +232,15 @@ class RunProgramOpKernel : public framework::OpKernel<T> {
 
     auto exe_ctx = exe.Prepare(*program, 0, skip_vars);
 
-    // get scope and clear old vars
-    framework::Scope &scope = *(out_scope_vec->front());
-    auto local_vars = scope.LocalVarNames();
-    scope.EraseVars(local_vars);
+    // NOTE(Aurelius84): While training some models, forward can be called many
+    // times and then apply backpropagation all at once, such as Reinforcement
+    // Learning. Tensor data in multi-step training should be saved into single
+    // scope separately. Otherwise, the gradients can be miscalculated because
+    // always using the Tensor data of the last step in forward.
+    framework::Scope *global_inner_scope = out_scope_vec->front();
+    VLOG(2) << "The number of sub scopes before forward: "
+            << out_scope_vec->front()->kids().size();
+    framework::Scope &scope = global_inner_scope->NewScope();
 
     // share input_vars & parameters into scope
     details::ShareVarsIntoScope(input_vars, input_var_names, &scope);
@@ -251,6 +256,12 @@ class RunProgramOpKernel : public framework::OpKernel<T> {
 
     // Debug info: scope info when run end
     VLOG(3) << framework::GenScopeTreeDebugInfo(out_scope_vec->front());
+    // Step 5. Drop all children scopes while testing.
+    if (is_test) {
+      out_scope_vec->front()->DropKids();
+    }
+    VLOG(2) << "The number of sub scopes after forward: "
+            << out_scope_vec->front()->kids().size();
   }
 };
 
@@ -285,8 +296,8 @@ class RunProgramGradOpKernel : public framework::OpKernel<T> {
 
     auto orig_end_op_index = ctx.Attr<int64_t>("end_op_index");
     // NOTE: skip `shape` and `fill_constant` op created by
-    // fluid.backward.gradients,
-    // one forward output will generate one `shape` and `fill_constant`
+    // fluid.backward.gradients, one forward output will generate one `shape`
+    // and `fill_constant`
     int64_t start_op_index = orig_end_op_index + (output_grad_vars.size() * 2);
     int64_t end_op_index = block->OpSize();
 
@@ -295,7 +306,16 @@ class RunProgramGradOpKernel : public framework::OpKernel<T> {
         out_scope_vec->size(), 1,
         platform::errors::InvalidArgument(
             "The OutScope of RunProgramGradOp should only hold one scope."));
-    auto &scope = *(out_scope_vec->front());
+
+    framework::Scope *global_inner_scope = out_scope_vec->front();
+    auto sub_scope_num = global_inner_scope->kids().size();
+    VLOG(2) << "The number of sub scopes before backward: " << sub_scope_num;
+    PADDLE_ENFORCE_GT(sub_scope_num, 0,
+                      platform::errors::InvalidArgument(
+                          "The OutScope of RunProgramGradOp should hold at "
+                          "least one sub scope."));
+
+    auto &scope = *(global_inner_scope->kids().front());
 
     // Step 2. prepare executor and scope
     framework::Executor exe(ctx.GetPlace());
@@ -324,6 +344,11 @@ class RunProgramGradOpKernel : public framework::OpKernel<T> {
     // Step 4. get outputs
     details::ShareVarsFromScope(input_grad_vars, input_grad_var_names, &scope);
     details::ShareVarsFromScope(param_grad_vars, param_grad_names, &scope);
+
+    // Step5. drop current scope
+    global_inner_scope->DeleteScope(&scope);
+    VLOG(2) << "The number of sub scopes after backward: "
+            << global_inner_scope->kids().size();
   }
 };
 
diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_reinforcement_learning.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_reinforcement_learning.py
index 2f753cd5cf..4813930159 100644
--- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_reinforcement_learning.py
+++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_reinforcement_learning.py
@@ -112,7 +112,7 @@ def train(args, place, to_static):
             state = to_variable(state)
             state.stop_gradient = True
             loss_probs = policy(state)
-            # print(loss_probs.name)
+
             probs = loss_probs.numpy()
 
             action, _mask = sample_action(probs[0])
@@ -166,10 +166,8 @@ def train(args, place, to_static):
         running_reward = 10
         for i_episode in itertools.count(1):
             state, ep_reward = env.reset(), 0
-            # TODO(Aurelius84): In RL, we continuously select actions with multiple steps, 
-            # then accumulate loss to apply optimization. But currently all vars shared with 
-            # the same inner scope, which has problem in backward. I will fix it in next PR.
-            for t in range(1, 2):  # default 1000
+            # The default loop number is 10000 is models, we changed it to 1000 for smaller test
+            for t in range(1, 1000):
                 state = np.array(state).astype("float32")
                 action, loss = select_action(state)
                 state, reward, done, _ = env.step(action)
@@ -203,7 +201,6 @@ class TestDeclarative(unittest.TestCase):
     def setUp(self):
         self.place = fluid.CUDAPlace(0) if fluid.is_compiled_with_cuda() \
             else fluid.CPUPlace()
-
         self.args = Args()
 
     def test_train(self):
-- 
GitLab