未验证 提交 91298884 编写于 作者: X xiongkun 提交者: GitHub

[ Dy2static ] select input fix and while_op memory bug fixed. (#45380)

* while support for python container.
It is convenient to convert more dynamic graph codes into static graphs.

* cond support python container

* 1. make select_input output shape = input[1]
2. add warning in while_loop risky assign

* fix 2 problem in GPT export:
1. a bug in while_op no_need_copy_var, which causes gpu memory leakage
2. a bug in undefined_var where the stop_gradient should be False.

* change name by code review

* format
上级 f4193eac
...@@ -86,6 +86,19 @@ class WhileOp : public framework::OperatorBase { ...@@ -86,6 +86,19 @@ class WhileOp : public framework::OperatorBase {
std::set<std::string> no_copy_var_names; std::set<std::string> no_copy_var_names;
if (!is_test) { if (!is_test) {
// set all persistable parameters into no_copy_var_names.
auto *global_block = block;
while (global_block->ID() != 0)
global_block = global_block->ParentBlock();
auto all_vars = global_block->AllVars();
std::for_each(all_vars.begin(),
all_vars.end(),
[&no_copy_var_names](framework::VarDesc *var) {
if (var->IsParameter())
no_copy_var_names.insert(var->Name());
});
const std::vector<framework::OpDesc *> &all_ops = block->AllOps(); const std::vector<framework::OpDesc *> &all_ops = block->AllOps();
for (const framework::OpDesc *op : all_ops) { for (const framework::OpDesc *op : all_ops) {
const framework::VariableNameMap &input_var_names = op->Inputs(); const framework::VariableNameMap &input_var_names = op->Inputs();
......
...@@ -148,7 +148,12 @@ def create_undefined_var_like(variable): ...@@ -148,7 +148,12 @@ def create_undefined_var_like(variable):
from paddle.fluid.dygraph.dygraph_to_static.return_transformer import RETURN_NO_VALUE_MAGIC_NUM from paddle.fluid.dygraph.dygraph_to_static.return_transformer import RETURN_NO_VALUE_MAGIC_NUM
var = data_layer_not_check(unique_name.generate("undefined_var"), var = data_layer_not_check(unique_name.generate("undefined_var"),
variable.shape, variable.dtype) variable.shape, variable.dtype)
var.stop_gradient = False
helper = LayerHelper('create_undefined_var_like', **locals())
saved_block_ids = helper.main_program.current_block_idx
helper.main_program.current_block_idx = 0
assign(RETURN_NO_VALUE_MAGIC_NUM, var) assign(RETURN_NO_VALUE_MAGIC_NUM, var)
helper.main_program.current_block_idx = saved_block_ids
return var return var
...@@ -156,6 +161,7 @@ def create_undefined_variable(): ...@@ -156,6 +161,7 @@ def create_undefined_variable():
from paddle.fluid.dygraph.dygraph_to_static.return_transformer import RETURN_NO_VALUE_MAGIC_NUM from paddle.fluid.dygraph.dygraph_to_static.return_transformer import RETURN_NO_VALUE_MAGIC_NUM
var = data_layer_not_check(unique_name.generate("undefined_var"), [1], var = data_layer_not_check(unique_name.generate("undefined_var"), [1],
"float64") "float64")
var.stop_gradient = False
# the variable is created in block(0), we append assign in block(0) either. # the variable is created in block(0), we append assign in block(0) either.
helper = LayerHelper('create_undefined_variable', **locals()) helper = LayerHelper('create_undefined_variable', **locals())
saved_block_ids = helper.main_program.current_block_idx saved_block_ids = helper.main_program.current_block_idx
......
...@@ -89,9 +89,9 @@ def select_input(inputs, mask): ...@@ -89,9 +89,9 @@ def select_input(inputs, mask):
check_type(inputs, 'inputs', (list, tuple), 'select_input') check_type(inputs, 'inputs', (list, tuple), 'select_input')
check_variable_and_dtype(mask, 'mask', ['int32'], 'select_input') check_variable_and_dtype(mask, 'mask', ['int32'], 'select_input')
input_dtype = inputs[0].dtype input_dtype = inputs[1].dtype
input_shape = inputs[0].shape input_shape = inputs[1].shape
input_type = inputs[0].type input_type = inputs[1].type
out = helper.create_variable(dtype=input_dtype, out = helper.create_variable(dtype=input_dtype,
shape=input_shape, shape=input_shape,
...@@ -1190,6 +1190,13 @@ def assign_skip_lod_tensor_array(input, output): ...@@ -1190,6 +1190,13 @@ def assign_skip_lod_tensor_array(input, output):
""" """
Assign input to output, but skip the process of copying LoDTensorArray unless it's created in while_block. Assign input to output, but skip the process of copying LoDTensorArray unless it's created in while_block.
""" """
def has_shape_diff(x_var, y_var):
if len(x_var.shape) != len(y_var.shape): return True
for x_dim, y_dim in zip(x_var.shape, y_var.shape):
if x_dim != y_dim and -1 not in [x_dim, y_dim]: return True
return False
if not isinstance(input, (Variable, core.VarBase)): if not isinstance(input, (Variable, core.VarBase)):
if isinstance(output, Variable) and isinstance( if isinstance(output, Variable) and isinstance(
input, support_ret_buildin_type): input, support_ret_buildin_type):
...@@ -1205,6 +1212,11 @@ def assign_skip_lod_tensor_array(input, output): ...@@ -1205,6 +1212,11 @@ def assign_skip_lod_tensor_array(input, output):
if parent_block and not parent_block._find_var_recursive(input.name): if parent_block and not parent_block._find_var_recursive(input.name):
assign(input, output) assign(input, output)
else: else:
if isinstance(output, Variable) and isinstance(
input, Variable) and has_shape_diff(input, output):
warnings.warn(
"In dy2static mode, we attemp to assign a variable with shape {} into a variable with shape{}, which is not always right."
.format(input.shape, output.shape))
assign(input, output) assign(input, output)
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册