From 912988842ecf6c999383a03f0672c135d4f6b9fb Mon Sep 17 00:00:00 2001 From: xiongkun Date: Fri, 26 Aug 2022 10:49:00 +0800 Subject: [PATCH] [ Dy2static ] select input fix and while_op memory bug fixed. (#45380) * while support for python container. It is convenient to convert more dynamic graph codes into static graphs. * cond support python container * 1. make select_input output shape = input[1] 2. add warning in while_loop risky assign * fix 2 problem in GPT export: 1. a bug in while_op no_need_copy_var, which causes gpu memory leakage 2. a bug in undefined_var where the stop_gradient should be False. * change name by code review * format --- paddle/fluid/operators/controlflow/while_op.cc | 13 +++++++++++++ .../fluid/dygraph/dygraph_to_static/utils.py | 6 ++++++ python/paddle/fluid/layers/control_flow.py | 18 +++++++++++++++--- 3 files changed, 34 insertions(+), 3 deletions(-) diff --git a/paddle/fluid/operators/controlflow/while_op.cc b/paddle/fluid/operators/controlflow/while_op.cc index 6ccacddf070..4e0344b3b93 100644 --- a/paddle/fluid/operators/controlflow/while_op.cc +++ b/paddle/fluid/operators/controlflow/while_op.cc @@ -86,6 +86,19 @@ class WhileOp : public framework::OperatorBase { std::set no_copy_var_names; if (!is_test) { + // set all persistable parameters into no_copy_var_names. + auto *global_block = block; + + while (global_block->ID() != 0) + global_block = global_block->ParentBlock(); + auto all_vars = global_block->AllVars(); + std::for_each(all_vars.begin(), + all_vars.end(), + [&no_copy_var_names](framework::VarDesc *var) { + if (var->IsParameter()) + no_copy_var_names.insert(var->Name()); + }); + const std::vector &all_ops = block->AllOps(); for (const framework::OpDesc *op : all_ops) { const framework::VariableNameMap &input_var_names = op->Inputs(); diff --git a/python/paddle/fluid/dygraph/dygraph_to_static/utils.py b/python/paddle/fluid/dygraph/dygraph_to_static/utils.py index d3db7209c65..09a249ed6b1 100644 --- a/python/paddle/fluid/dygraph/dygraph_to_static/utils.py +++ b/python/paddle/fluid/dygraph/dygraph_to_static/utils.py @@ -148,7 +148,12 @@ def create_undefined_var_like(variable): from paddle.fluid.dygraph.dygraph_to_static.return_transformer import RETURN_NO_VALUE_MAGIC_NUM var = data_layer_not_check(unique_name.generate("undefined_var"), variable.shape, variable.dtype) + var.stop_gradient = False + helper = LayerHelper('create_undefined_var_like', **locals()) + saved_block_ids = helper.main_program.current_block_idx + helper.main_program.current_block_idx = 0 assign(RETURN_NO_VALUE_MAGIC_NUM, var) + helper.main_program.current_block_idx = saved_block_ids return var @@ -156,6 +161,7 @@ def create_undefined_variable(): from paddle.fluid.dygraph.dygraph_to_static.return_transformer import RETURN_NO_VALUE_MAGIC_NUM var = data_layer_not_check(unique_name.generate("undefined_var"), [1], "float64") + var.stop_gradient = False # the variable is created in block(0), we append assign in block(0) either. helper = LayerHelper('create_undefined_variable', **locals()) saved_block_ids = helper.main_program.current_block_idx diff --git a/python/paddle/fluid/layers/control_flow.py b/python/paddle/fluid/layers/control_flow.py index 43781665dc3..258038fea6a 100755 --- a/python/paddle/fluid/layers/control_flow.py +++ b/python/paddle/fluid/layers/control_flow.py @@ -89,9 +89,9 @@ def select_input(inputs, mask): check_type(inputs, 'inputs', (list, tuple), 'select_input') check_variable_and_dtype(mask, 'mask', ['int32'], 'select_input') - input_dtype = inputs[0].dtype - input_shape = inputs[0].shape - input_type = inputs[0].type + input_dtype = inputs[1].dtype + input_shape = inputs[1].shape + input_type = inputs[1].type out = helper.create_variable(dtype=input_dtype, shape=input_shape, @@ -1190,6 +1190,13 @@ def assign_skip_lod_tensor_array(input, output): """ Assign input to output, but skip the process of copying LoDTensorArray unless it's created in while_block. """ + + def has_shape_diff(x_var, y_var): + if len(x_var.shape) != len(y_var.shape): return True + for x_dim, y_dim in zip(x_var.shape, y_var.shape): + if x_dim != y_dim and -1 not in [x_dim, y_dim]: return True + return False + if not isinstance(input, (Variable, core.VarBase)): if isinstance(output, Variable) and isinstance( input, support_ret_buildin_type): @@ -1205,6 +1212,11 @@ def assign_skip_lod_tensor_array(input, output): if parent_block and not parent_block._find_var_recursive(input.name): assign(input, output) else: + if isinstance(output, Variable) and isinstance( + input, Variable) and has_shape_diff(input, output): + warnings.warn( + "In dy2static mode, we attemp to assign a variable with shape {} into a variable with shape{}, which is not always right." + .format(input.shape, output.shape)) assign(input, output) -- GitLab