[Dy2Stat] Fix scope in run_program_op (#25579)

* add reinforcement learning model test=develop * align backward test=develop * add gym in paddle_build.sh test=develop * rm pip install in script test=develop * refine paddle_build.sh test=develop * fix sed error in macOS test=develop * polish code test=develop * fix scope problem * refine code by reviewer comment

[Dy2Stat] Fix scope in run_program_op (#25579)
* add reinforcement learning model test=develop * align backward test=develop * add gym in paddle_build.sh test=develop * rm pip install in script test=develop * refine paddle_build.sh test=develop * fix sed error in macOS test=develop * polish code test=develop * fix scope problem * refine code by reviewer comment
ca1185d0 · Aurelius84 · GitHub · 2092660c · ca1185d0 · ca1185d0
2 changed file
--- a/paddle/fluid/operators/run_program_op.h
+++ b/paddle/fluid/operators/run_program_op.h
@@ -232,10 +232,15 @@ class RunProgramOpKernel : public framework::OpKernel<T> {

    auto exe_ctx = exe.Prepare(*program, 0, skip_vars);

-    // get scope and clear old vars
-    framework::Scope &scope = *(out_scope_vec->front());
-    auto local_vars = scope.LocalVarNames();
-    scope.EraseVars(local_vars);
+    // NOTE(Aurelius84): While training some models, forward can be called many
+    // times and then apply backpropagation all at once, such as Reinforcement
+    // Learning. Tensor data in multi-step training should be saved into single
+    // scope separately. Otherwise, the gradients can be miscalculated because
+    // always using the Tensor data of the last step in forward.
+    framework::Scope *global_inner_scope = out_scope_vec->front();
+    VLOG(2) << "The number of sub scopes before forward: "
+            << out_scope_vec->front()->kids().size();
+    framework::Scope &scope = global_inner_scope->NewScope();

    // share input_vars & parameters into scope
    details::ShareVarsIntoScope(input_vars, input_var_names, &scope);
@@ -251,6 +256,12 @@ class RunProgramOpKernel : public framework::OpKernel<T> {

    // Debug info: scope info when run end
    VLOG(3) << framework::GenScopeTreeDebugInfo(out_scope_vec->front());
+    // Step 5. Drop all children scopes while testing.
+    if (is_test) {
+      out_scope_vec->front()->DropKids();
+    }
+    VLOG(2) << "The number of sub scopes after forward: "
+            << out_scope_vec->front()->kids().size();
  }
 };

@@ -285,8 +296,8 @@ class RunProgramGradOpKernel : public framework::OpKernel<T> {

    auto orig_end_op_index = ctx.Attr<int64_t>("end_op_index");
    // NOTE: skip `shape` and `fill_constant` op created by
-    // fluid.backward.gradients,
-    // one forward output will generate one `shape` and `fill_constant`
+    // fluid.backward.gradients, one forward output will generate one `shape`
+    // and `fill_constant`
    int64_t start_op_index = orig_end_op_index + (output_grad_vars.size() * 2);
    int64_t end_op_index = block->OpSize();

@@ -295,7 +306,16 @@ class RunProgramGradOpKernel : public framework::OpKernel<T> {
        out_scope_vec->size(), 1,
        platform::errors::InvalidArgument(
            "The OutScope of RunProgramGradOp should only hold one scope."));
-    auto &scope = *(out_scope_vec->front());
+
+    framework::Scope *global_inner_scope = out_scope_vec->front();
+    auto sub_scope_num = global_inner_scope->kids().size();
+    VLOG(2) << "The number of sub scopes before backward: " << sub_scope_num;
+    PADDLE_ENFORCE_GT(sub_scope_num, 0,
+                      platform::errors::InvalidArgument(
+                          "The OutScope of RunProgramGradOp should hold at "
+                          "least one sub scope."));
+
+    auto &scope = *(global_inner_scope->kids().front());

    // Step 2. prepare executor and scope
    framework::Executor exe(ctx.GetPlace());
@@ -324,6 +344,11 @@ class RunProgramGradOpKernel : public framework::OpKernel<T> {
    // Step 4. get outputs
    details::ShareVarsFromScope(input_grad_vars, input_grad_var_names, &scope);
    details::ShareVarsFromScope(param_grad_vars, param_grad_names, &scope);
+
+    // Step5. drop current scope
+    global_inner_scope->DeleteScope(&scope);
+    VLOG(2) << "The number of sub scopes after backward: "
+            << global_inner_scope->kids().size();
  }
 };


--- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_reinforcement_learning.py
+++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_reinforcement_learning.py
@@ -112,7 +112,7 @@ def train(args, place, to_static):
            state = to_variable(state)
            state.stop_gradient = True
            loss_probs = policy(state)
-            # print(loss_probs.name)
+
            probs = loss_probs.numpy()

            action, _mask = sample_action(probs[0])
@@ -166,10 +166,8 @@ def train(args, place, to_static):
        running_reward = 10
        for i_episode in itertools.count(1):
            state, ep_reward = env.reset(), 0
-            # TODO(Aurelius84): In RL, we continuously select actions with multiple steps, 
-            # then accumulate loss to apply optimization. But currently all vars shared with 
-            # the same inner scope, which has problem in backward. I will fix it in next PR.
-            for t in range(1, 2):  # default 1000
+            # The default loop number is 10000 is models, we changed it to 1000 for smaller test
+            for t in range(1, 1000):
                state = np.array(state).astype("float32")
                action, loss = select_action(state)
                state, reward, done, _ = env.step(action)
@@ -203,7 +201,6 @@ class TestDeclarative(unittest.TestCase):
    def setUp(self):
        self.place = fluid.CUDAPlace(0) if fluid.is_compiled_with_cuda() \
            else fluid.CPUPlace()
-
        self.args = Args()

    def test_train(self):