Created by: liym27
PR types
Bug fixes
PR changes
OPs
Describe
find_op_path
about while sub-block when appending backward.
Support -
Before: No pruning op in while sub-block when constructing backward op path, which will cause use X@GRAD before creating X@GRAD.
-
After: Prune OPs which are not related with
loss
in while sub-block when constructing backward OP path.
背景
- 报错问题: 涉及到控制流while_loop的反向组网,在子block中,会存在这样的反向op:其输入 x@GRAD来自父block,且是while_grad的输入和输出。如果x@GRAD未在父block中初始化,将导致运行失败。
- 在模型组网中的表现: while_loop的输入输出变量中,存在部分变量与loss无关联
- 反向组网时,原框架中未全面考虑涉及控制流block时的OP的剪枝。
样例代码
import paddle.fluid.layers as layers
import paddle.fluid as fluid
from paddle.fluid.layers import assign
import numpy as np
import time
from paddle.fluid import profiler
x = fluid.data(name='x', shape=[1], dtype='float32')
x.stop_gradient = False
i = fluid.data(name='i', shape=[1], dtype='float32')
i.stop_gradient= False
five = layers.fill_constant(shape=[1], dtype='float32', value=5)
feed_x = np.ones(1).astype('float32')
feed_i = np.zeros(1).astype('float32')
def cond(i, x):
return i < 5
def body(i, x):
x = x + i
i = i+1
return i, x
out = layers.while_loop(cond, body, [i, x])
mean = fluid.layers.mean(x)
fluid.backward.append_backward(mean)
exe = fluid.Executor(fluid.CPUPlace())
exe.run(fluid.default_startup_program())
res = exe.run(fluid.default_main_program(), feed={'x': feed_x, 'i': feed_i}, fetch_list=[i.grad_name, x])
print(fluid.default_main_program())
print('i_grad value is:', res[0])
print('x value is:', res[1])
运行结果
- Before
--------------------------------------------
C++ Call Stacks (More useful to developers):
--------------------------------------------
0 std::string paddle::platform::GetTraceBackString<std::string const&>(std::string const&&&, char const*, int)
1 paddle::framework::Tensor::check_memory_size() const
2 paddle::framework::Tensor::ShareDataWith(paddle::framework::Tensor const&)
3 paddle::operators::WhileGradOp::RunImpl(paddle::framework::Scope const&, paddle::platform::Place const&) const
4 paddle::framework::OperatorBase::Run(paddle::framework::Scope const&, paddle::platform::Place const&)
5 paddle::framework::Executor::RunPartialPreparedContext(paddle::framework::ExecutorPrepareContext*, paddle::framework::Scope*, long long, long long, bool, bool, bool)
6 paddle::framework::Executor::Run(paddle::framework::ProgramDesc const&, paddle::framework::Scope*, int, bool, bool, std::__1::vector<std::string, std::__1::allocator<std::string > > const&, bool, bool)
7 void pybind11::cpp_function::initialize<paddle::pybind::pybind11_init_core_avx(pybind11::module&)::$_111, void, paddle::framework::Executor&, paddle::framework::ProgramDesc const&, paddle::framework::Scope*, int, bool, bool, std::__1::vector<std::string, std::__1::allocator<std::string > > const&, pybind11::name, pybind11::is_method, pybind11::sibling>(paddle::pybind::pybind11_init_core_avx(pybind11::module&)::$_111&&, void (*)(paddle::framework::Executor&, paddle::framework::ProgramDesc const&, paddle::framework::Scope*, int, bool, bool, std::__1::vector<std::string, std::__1::allocator<std::string > > const&), pybind11::name const&, pybind11::is_method const&, pybind11::sibling const&)::'lambda'(pybind11::detail::function_call&)::__invoke(pybind11::detail::function_call&)
8 pybind11::cpp_function::dispatcher(_object*, _object*, _object*)
----------------------
Error Message Summary:
----------------------
Error: Tensor holds no memory. Call Tensor::mutable_data first.
[Hint: holder_ should not be null.] at (/home/teamcity/buildAgent/work/1ec40e2d88fa641/paddle/fluid/framework/tensor.cc:23)
- After:
{ // block 0
var x : fluid.VarType.LOD_TENSOR.shape(1L,).astype(VarType.FP32)
var i : fluid.VarType.LOD_TENSOR.shape(1L,).astype(VarType.FP32)
var fill_constant_0.tmp_0 : fluid.VarType.LOD_TENSOR.shape(1L,).astype(VarType.FP32)
var tmp_0 : fluid.VarType.LOD_TENSOR.shape(1L,).astype(VarType.FP32)
var tmp_1 : fluid.VarType.LOD_TENSOR.shape(1L,).astype(VarType.BOOL)
var _generated_var_0 : fluid.VarType.STEP_SCOPES)
var mean_0.tmp_0 : fluid.VarType.LOD_TENSOR.shape(1L,).astype(VarType.FP32)
var i@GRAD : fluid.VarType.LOD_TENSOR.shape(1L,).astype(VarType.FP32)
var x@GRAD : fluid.VarType.LOD_TENSOR.shape(1L,).astype(VarType.FP32)
var mean_0.tmp_0@GRAD : fluid.VarType.LOD_TENSOR.shape(1L,).astype(VarType.FP32)
{Out=[u'fill_constant_0.tmp_0']} = fill_constant(inputs={ShapeTensor=[], ShapeTensorList=[], ValueTensor=[]}, dtype = 5, force_cpu = False, op_device = , op_namescope = /, op_role = 0, op_role_var = [], shape = [1L], str_value = 5.0, value = 5.0)
{Out=[u'tmp_0']} = fill_constant(inputs={}, dtype = 5, force_cpu = False, op_device = , op_namescope = /, op_role = 0, op_role_var = [], shape = [1L], str_value = , value = 5.0)
{Out=[u'tmp_1']} = less_than(inputs={X=[u'i'], Y=[u'tmp_0']}, axis = -1, force_cpu = False, op_device = , op_namescope = /, op_role = 0, op_role_var = [])
{Out=[u'i', u'tmp_1', u'x'], StepScopes=[u'_generated_var_0']} = while(inputs={Condition=[u'tmp_1'], X=[u'i', u'x']}, is_test = False, op_device = , op_namescope = /, op_role = 0, op_role_var = [], skip_eager_deletion_vars = [], sub_block = block[1])
{Out=[u'mean_0.tmp_0']} = mean(inputs={X=[u'x']}, op_device = , op_namescope = /, op_role = 256, op_role_var = [])
{Out=[u'mean_0.tmp_0@GRAD']} = fill_constant(inputs={}, dtype = 5, force_cpu = False, op_device = , op_role = 257, shape = [1L], value = 1.0)
{X@GRAD=[u'x@GRAD']} = mean_grad(inputs={Out@GRAD=[u'mean_0.tmp_0@GRAD'], X=[u'x']}, op_device = , op_role = 1)
{X@GRAD=[u'i@GRAD', u'x@GRAD']} = while_grad(inputs={Out=[u'i', u'tmp_1', u'x'], Out@GRAD=[u'x@GRAD'], StepScopes=[u'_generated_var_0'], X=[u'i', u'x']}, is_test = False, op_device = , op_namescope = /, op_role = 1, op_role_var = [], original_output_grad = [u'x@GRAD'], skip_eager_deletion_vars = [], sub_block = block[2])
}
{ // block 1
var tmp_2 : fluid.VarType.LOD_TENSOR.shape(1L,).astype(VarType.FP32)
var tmp_3 : fluid.VarType.LOD_TENSOR.shape(1L,).astype(VarType.FP32)
var tmp_4 : fluid.VarType.LOD_TENSOR.shape(1L,).astype(VarType.FP32)
var tmp_5 : fluid.VarType.LOD_TENSOR.shape(1L,).astype(VarType.BOOL)
{Out=[u'tmp_2']} = elementwise_add(inputs={X=[u'x'], Y=[u'i']}, Scale_out = 1.0, Scale_x = 1.0, Scale_y = 1.0, axis = -1, op_device = , op_namescope = /, op_role = 0, op_role_var = [], use_mkldnn = False, use_quantizer = False, x_data_format = , y_data_format = )
{Out=[u'tmp_3']} = scale(inputs={ScaleTensor=[], X=[u'i']}, bias = 1.0, bias_after_scale = True, op_device = , op_namescope = /, op_role = 0, op_role_var = [], scale = 1.0)
{Out=[u'tmp_4']} = fill_constant(inputs={}, dtype = 5, force_cpu = False, op_device = , op_namescope = /, op_role = 0, op_role_var = [], shape = [1L], str_value = , value = 5.0)
{Out=[u'tmp_5']} = less_than(inputs={X=[u'tmp_3'], Y=[u'tmp_4']}, axis = -1, force_cpu = False, op_device = , op_namescope = /, op_role = 0, op_role_var = [])
{Out=[u'i']} = assign(inputs={X=[u'tmp_3']}, op_device = , op_namescope = /, op_role = 0, op_role_var = [])
{Out=[u'x']} = assign(inputs={X=[u'tmp_2']}, op_device = , op_namescope = /, op_role = 0, op_role_var = [])
{Out=[u'tmp_1']} = assign(inputs={X=[u'tmp_5']}, op_device = , op_namescope = /, op_role = 0, op_role_var = [])
}
{ // block 2
var i@GRAD : fluid.VarType.LOD_TENSOR.shape(1L,).astype(VarType.FP32)
var tmp_2@GRAD : fluid.VarType.LOD_TENSOR.shape(1L,).astype(VarType.FP32)
{Out=[u'tmp_2@GRAD']} = assign(inputs={X=[u'x@GRAD']}, op_device = , op_role = 1)
{X@GRAD=[u'x@GRAD'], Y@GRAD=[u'i@GRAD']} = elementwise_add_grad(inputs={Out@GRAD=[u'tmp_2@GRAD'], X=[u'x'], Y=[u'i']}, Scale_out = 1.0, Scale_x = 1.0, Scale_y = 1.0, axis = -1, op_device = , op_namescope = /, op_role = 1, op_role_var = [], use_mkldnn = False, use_quantizer = False, x_data_format = , y_data_format = )
}
('i_grad value is:', array([1.], dtype=float32))
('x value is:', array([11.], dtype=float32))