[while grad]Support pruning op in find_op_path about while sub-block when appending backward (!25330) · 合并请求 · PaddlePaddle / Paddle

[while grad]Support pruning op in find_op_path about while sub-block when appending backward !25330

Created by: liym27

PR types

Bug fixes

PR changes

OPs

Describe

Support `find_op_path` about while sub-block when appending backward.

Before: No pruning op in while sub-block when constructing backward op path, which will cause use X@GRAD before creating X@GRAD.
After: Prune OPs which are not related with loss in while sub-block when constructing backward OP path.

背景

报错问题：涉及到控制流while_loop的反向组网，在子block中，会存在这样的反向op：其输入 x@GRAD来自父block，且是while_grad的输入和输出。如果x@GRAD未在父block中初始化，将导致运行失败。
在模型组网中的表现： while_loop的输入输出变量中，存在部分变量与loss无关联
反向组网时，原框架中未全面考虑涉及控制流block时的OP的剪枝。

样例代码

import paddle.fluid.layers as layers
import paddle.fluid as fluid
from paddle.fluid.layers import assign
import numpy as np
import time
from paddle.fluid import profiler
x = fluid.data(name='x', shape=[1], dtype='float32')
x.stop_gradient = False
i = fluid.data(name='i', shape=[1], dtype='float32')
i.stop_gradient= False
five = layers.fill_constant(shape=[1], dtype='float32', value=5)
feed_x = np.ones(1).astype('float32')
feed_i = np.zeros(1).astype('float32')
def cond(i, x):
    return i < 5
def body(i, x):
    x = x + i
    i = i+1
    return i, x
out = layers.while_loop(cond, body, [i, x])
mean = fluid.layers.mean(x)
fluid.backward.append_backward(mean)
exe = fluid.Executor(fluid.CPUPlace())
exe.run(fluid.default_startup_program())
res = exe.run(fluid.default_main_program(), feed={'x': feed_x, 'i': feed_i}, fetch_list=[i.grad_name, x])
print(fluid.default_main_program())
print('i_grad value is:', res[0])
print('x value is:', res[1])

运行结果

Before

--------------------------------------------
C++ Call Stacks (More useful to developers):
--------------------------------------------
0   std::string paddle::platform::GetTraceBackString<std::string const&>(std::string const&&&, char const*, int)
1   paddle::framework::Tensor::check_memory_size() const
2   paddle::framework::Tensor::ShareDataWith(paddle::framework::Tensor const&)
3   paddle::operators::WhileGradOp::RunImpl(paddle::framework::Scope const&, paddle::platform::Place const&) const
4   paddle::framework::OperatorBase::Run(paddle::framework::Scope const&, paddle::platform::Place const&)
5   paddle::framework::Executor::RunPartialPreparedContext(paddle::framework::ExecutorPrepareContext*, paddle::framework::Scope*, long long, long long, bool, bool, bool)
6   paddle::framework::Executor::Run(paddle::framework::ProgramDesc const&, paddle::framework::Scope*, int, bool, bool, std::__1::vector<std::string, std::__1::allocator<std::string > > const&, bool, bool)
7   void pybind11::cpp_function::initialize<paddle::pybind::pybind11_init_core_avx(pybind11::module&)::$_111, void, paddle::framework::Executor&, paddle::framework::ProgramDesc const&, paddle::framework::Scope*, int, bool, bool, std::__1::vector<std::string, std::__1::allocator<std::string > > const&, pybind11::name, pybind11::is_method, pybind11::sibling>(paddle::pybind::pybind11_init_core_avx(pybind11::module&)::$_111&&, void (*)(paddle::framework::Executor&, paddle::framework::ProgramDesc const&, paddle::framework::Scope*, int, bool, bool, std::__1::vector<std::string, std::__1::allocator<std::string > > const&), pybind11::name const&, pybind11::is_method const&, pybind11::sibling const&)::'lambda'(pybind11::detail::function_call&)::__invoke(pybind11::detail::function_call&)
8   pybind11::cpp_function::dispatcher(_object*, _object*, _object*)

----------------------
Error Message Summary:
----------------------
Error: Tensor holds no memory. Call Tensor::mutable_data first.
  [Hint: holder_ should not be null.] at (/home/teamcity/buildAgent/work/1ec40e2d88fa641/paddle/fluid/framework/tensor.cc:23)

After:

{ // block 0
    var x : fluid.VarType.LOD_TENSOR.shape(1L,).astype(VarType.FP32)
    var i : fluid.VarType.LOD_TENSOR.shape(1L,).astype(VarType.FP32)
    var fill_constant_0.tmp_0 : fluid.VarType.LOD_TENSOR.shape(1L,).astype(VarType.FP32)
    var tmp_0 : fluid.VarType.LOD_TENSOR.shape(1L,).astype(VarType.FP32)
    var tmp_1 : fluid.VarType.LOD_TENSOR.shape(1L,).astype(VarType.BOOL)
    var _generated_var_0 : fluid.VarType.STEP_SCOPES)
    var mean_0.tmp_0 : fluid.VarType.LOD_TENSOR.shape(1L,).astype(VarType.FP32)
    var i@GRAD : fluid.VarType.LOD_TENSOR.shape(1L,).astype(VarType.FP32)
    var x@GRAD : fluid.VarType.LOD_TENSOR.shape(1L,).astype(VarType.FP32)
    var mean_0.tmp_0@GRAD : fluid.VarType.LOD_TENSOR.shape(1L,).astype(VarType.FP32)

    {Out=[u'fill_constant_0.tmp_0']} = fill_constant(inputs={ShapeTensor=[], ShapeTensorList=[], ValueTensor=[]}, dtype = 5, force_cpu = False, op_device = , op_namescope = /, op_role = 0, op_role_var = [], shape = [1L], str_value = 5.0, value = 5.0)
    {Out=[u'tmp_0']} = fill_constant(inputs={}, dtype = 5, force_cpu = False, op_device = , op_namescope = /, op_role = 0, op_role_var = [], shape = [1L], str_value = , value = 5.0)
    {Out=[u'tmp_1']} = less_than(inputs={X=[u'i'], Y=[u'tmp_0']}, axis = -1, force_cpu = False, op_device = , op_namescope = /, op_role = 0, op_role_var = [])
    {Out=[u'i', u'tmp_1', u'x'], StepScopes=[u'_generated_var_0']} = while(inputs={Condition=[u'tmp_1'], X=[u'i', u'x']}, is_test = False, op_device = , op_namescope = /, op_role = 0, op_role_var = [], skip_eager_deletion_vars = [], sub_block = block[1])
    {Out=[u'mean_0.tmp_0']} = mean(inputs={X=[u'x']}, op_device = , op_namescope = /, op_role = 256, op_role_var = [])
    {Out=[u'mean_0.tmp_0@GRAD']} = fill_constant(inputs={}, dtype = 5, force_cpu = False, op_device = , op_role = 257, shape = [1L], value = 1.0)
    {X@GRAD=[u'x@GRAD']} = mean_grad(inputs={Out@GRAD=[u'mean_0.tmp_0@GRAD'], X=[u'x']}, op_device = , op_role = 1)
    {X@GRAD=[u'i@GRAD', u'x@GRAD']} = while_grad(inputs={Out=[u'i', u'tmp_1', u'x'], Out@GRAD=[u'x@GRAD'], StepScopes=[u'_generated_var_0'], X=[u'i', u'x']}, is_test = False, op_device = , op_namescope = /, op_role = 1, op_role_var = [], original_output_grad = [u'x@GRAD'], skip_eager_deletion_vars = [], sub_block = block[2])
}
{ // block 1
    var tmp_2 : fluid.VarType.LOD_TENSOR.shape(1L,).astype(VarType.FP32)
    var tmp_3 : fluid.VarType.LOD_TENSOR.shape(1L,).astype(VarType.FP32)
    var tmp_4 : fluid.VarType.LOD_TENSOR.shape(1L,).astype(VarType.FP32)
    var tmp_5 : fluid.VarType.LOD_TENSOR.shape(1L,).astype(VarType.BOOL)

    {Out=[u'tmp_2']} = elementwise_add(inputs={X=[u'x'], Y=[u'i']}, Scale_out = 1.0, Scale_x = 1.0, Scale_y = 1.0, axis = -1, op_device = , op_namescope = /, op_role = 0, op_role_var = [], use_mkldnn = False, use_quantizer = False, x_data_format = , y_data_format = )
    {Out=[u'tmp_3']} = scale(inputs={ScaleTensor=[], X=[u'i']}, bias = 1.0, bias_after_scale = True, op_device = , op_namescope = /, op_role = 0, op_role_var = [], scale = 1.0)
    {Out=[u'tmp_4']} = fill_constant(inputs={}, dtype = 5, force_cpu = False, op_device = , op_namescope = /, op_role = 0, op_role_var = [], shape = [1L], str_value = , value = 5.0)
    {Out=[u'tmp_5']} = less_than(inputs={X=[u'tmp_3'], Y=[u'tmp_4']}, axis = -1, force_cpu = False, op_device = , op_namescope = /, op_role = 0, op_role_var = [])
    {Out=[u'i']} = assign(inputs={X=[u'tmp_3']}, op_device = , op_namescope = /, op_role = 0, op_role_var = [])
    {Out=[u'x']} = assign(inputs={X=[u'tmp_2']}, op_device = , op_namescope = /, op_role = 0, op_role_var = [])
    {Out=[u'tmp_1']} = assign(inputs={X=[u'tmp_5']}, op_device = , op_namescope = /, op_role = 0, op_role_var = [])
}
{ // block 2
    var i@GRAD : fluid.VarType.LOD_TENSOR.shape(1L,).astype(VarType.FP32)
    var tmp_2@GRAD : fluid.VarType.LOD_TENSOR.shape(1L,).astype(VarType.FP32)

    {Out=[u'tmp_2@GRAD']} = assign(inputs={X=[u'x@GRAD']}, op_device = , op_role = 1)
    {X@GRAD=[u'x@GRAD'], Y@GRAD=[u'i@GRAD']} = elementwise_add_grad(inputs={Out@GRAD=[u'tmp_2@GRAD'], X=[u'x'], Y=[u'i']}, Scale_out = 1.0, Scale_x = 1.0, Scale_y = 1.0, axis = -1, op_device = , op_namescope = /, op_role = 1, op_role_var = [], use_mkldnn = False, use_quantizer = False, x_data_format = , y_data_format = )
}

('i_grad value is:', array([1.], dtype=float32))
('x value is:', array([11.], dtype=float32))

PaddlePaddle / Paddle 大约 1 年 前同步成功

[while grad]Support pruning op in find_op_path about while sub-block when appending backward !25330

PR types

PR changes

Describe

Support find_op_path about while sub-block when appending backward.

背景

样例代码

运行结果

PaddlePaddle / Paddle
大约 1 年前同步成功

Support `find_op_path` about while sub-block when appending backward.