未验证 提交 ca1185d0 编写于 作者: A Aurelius84 提交者: GitHub

[Dy2Stat] Fix scope in run_program_op (#25579)

* add reinforcement learning model test=develop

* align backward test=develop

* add gym in paddle_build.sh test=develop

* rm pip install in script test=develop

* refine paddle_build.sh test=develop

* fix sed error in macOS test=develop

* polish code test=develop

* fix scope problem

* refine code by reviewer comment
上级 2092660c
......@@ -232,10 +232,15 @@ class RunProgramOpKernel : public framework::OpKernel<T> {
auto exe_ctx = exe.Prepare(*program, 0, skip_vars);
// get scope and clear old vars
framework::Scope &scope = *(out_scope_vec->front());
auto local_vars = scope.LocalVarNames();
scope.EraseVars(local_vars);
// NOTE(Aurelius84): While training some models, forward can be called many
// times and then apply backpropagation all at once, such as Reinforcement
// Learning. Tensor data in multi-step training should be saved into single
// scope separately. Otherwise, the gradients can be miscalculated because
// always using the Tensor data of the last step in forward.
framework::Scope *global_inner_scope = out_scope_vec->front();
VLOG(2) << "The number of sub scopes before forward: "
<< out_scope_vec->front()->kids().size();
framework::Scope &scope = global_inner_scope->NewScope();
// share input_vars & parameters into scope
details::ShareVarsIntoScope(input_vars, input_var_names, &scope);
......@@ -251,6 +256,12 @@ class RunProgramOpKernel : public framework::OpKernel<T> {
// Debug info: scope info when run end
VLOG(3) << framework::GenScopeTreeDebugInfo(out_scope_vec->front());
// Step 5. Drop all children scopes while testing.
if (is_test) {
out_scope_vec->front()->DropKids();
}
VLOG(2) << "The number of sub scopes after forward: "
<< out_scope_vec->front()->kids().size();
}
};
......@@ -285,8 +296,8 @@ class RunProgramGradOpKernel : public framework::OpKernel<T> {
auto orig_end_op_index = ctx.Attr<int64_t>("end_op_index");
// NOTE: skip `shape` and `fill_constant` op created by
// fluid.backward.gradients,
// one forward output will generate one `shape` and `fill_constant`
// fluid.backward.gradients, one forward output will generate one `shape`
// and `fill_constant`
int64_t start_op_index = orig_end_op_index + (output_grad_vars.size() * 2);
int64_t end_op_index = block->OpSize();
......@@ -295,7 +306,16 @@ class RunProgramGradOpKernel : public framework::OpKernel<T> {
out_scope_vec->size(), 1,
platform::errors::InvalidArgument(
"The OutScope of RunProgramGradOp should only hold one scope."));
auto &scope = *(out_scope_vec->front());
framework::Scope *global_inner_scope = out_scope_vec->front();
auto sub_scope_num = global_inner_scope->kids().size();
VLOG(2) << "The number of sub scopes before backward: " << sub_scope_num;
PADDLE_ENFORCE_GT(sub_scope_num, 0,
platform::errors::InvalidArgument(
"The OutScope of RunProgramGradOp should hold at "
"least one sub scope."));
auto &scope = *(global_inner_scope->kids().front());
// Step 2. prepare executor and scope
framework::Executor exe(ctx.GetPlace());
......@@ -324,6 +344,11 @@ class RunProgramGradOpKernel : public framework::OpKernel<T> {
// Step 4. get outputs
details::ShareVarsFromScope(input_grad_vars, input_grad_var_names, &scope);
details::ShareVarsFromScope(param_grad_vars, param_grad_names, &scope);
// Step5. drop current scope
global_inner_scope->DeleteScope(&scope);
VLOG(2) << "The number of sub scopes after backward: "
<< global_inner_scope->kids().size();
}
};
......
......@@ -112,7 +112,7 @@ def train(args, place, to_static):
state = to_variable(state)
state.stop_gradient = True
loss_probs = policy(state)
# print(loss_probs.name)
probs = loss_probs.numpy()
action, _mask = sample_action(probs[0])
......@@ -166,10 +166,8 @@ def train(args, place, to_static):
running_reward = 10
for i_episode in itertools.count(1):
state, ep_reward = env.reset(), 0
# TODO(Aurelius84): In RL, we continuously select actions with multiple steps,
# then accumulate loss to apply optimization. But currently all vars shared with
# the same inner scope, which has problem in backward. I will fix it in next PR.
for t in range(1, 2): # default 1000
# The default loop number is 10000 is models, we changed it to 1000 for smaller test
for t in range(1, 1000):
state = np.array(state).astype("float32")
action, loss = select_action(state)
state, reward, done, _ = env.step(action)
......@@ -203,7 +201,6 @@ class TestDeclarative(unittest.TestCase):
def setUp(self):
self.place = fluid.CUDAPlace(0) if fluid.is_compiled_with_cuda() \
else fluid.CPUPlace()
self.args = Args()
def test_train(self):
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册