From 887a39f05047b3aa9f51e41f64e9c4cbea72d37f Mon Sep 17 00:00:00 2001 From: Zeng Jinle <32832641+sneaxiy@users.noreply.github.com> Date: Fri, 24 May 2019 19:27:58 +0800 Subject: [PATCH] Fix dygraph unique name bug (#17592) * fix unique_name growth bug in dygraph mode,test=develop * change generate_tmp to generate_with_ignorable_key,test=develop --- python/paddle/fluid/layer_helper_base.py | 17 ++++++++-------- python/paddle/fluid/layers/collective.py | 3 ++- python/paddle/fluid/layers/control_flow.py | 15 ++++++++------ python/paddle/fluid/layers/device.py | 2 +- python/paddle/fluid/layers/nn.py | 6 ++++-- python/paddle/fluid/optimizer.py | 6 ++++-- python/paddle/fluid/unique_name.py | 23 ++++++++++++++++++++++ 7 files changed, 52 insertions(+), 20 deletions(-) diff --git a/python/paddle/fluid/layer_helper_base.py b/python/paddle/fluid/layer_helper_base.py index 9eed00b16..cbfd4f45f 100644 --- a/python/paddle/fluid/layer_helper_base.py +++ b/python/paddle/fluid/layer_helper_base.py @@ -85,19 +85,19 @@ class LayerHelperBase(object): block=self.startup_program.global_block()): if out is None: out = block.create_var( - name=unique_name.generate(".".join( + name=unique_name.generate_with_ignorable_key(".".join( [self.name, 'weight_norm_norm'])), dtype=dtype, persistable=False) abs_out = block.create_var( - name=unique_name.generate(".".join( + name=unique_name.generate_with_ignorable_key(".".join( [self.name, 'weight_norm_abs'])), dtype=dtype, persistable=False) block.append_op( type='abs', inputs={'X': x}, outputs={'Out': abs_out}) pow_out = block.create_var( - name=unique_name.generate(".".join( + name=unique_name.generate_with_ignorable_key(".".join( [self.name, 'weight_norm_pow'])), dtype=dtype, persistable=False) @@ -107,7 +107,7 @@ class LayerHelperBase(object): outputs={'Out': pow_out}, attrs={'factor': float(p)}) sum_out = block.create_var( - name=unique_name.generate(".".join( + name=unique_name.generate_with_ignorable_key(".".join( [self.name, 'weight_norm_sum'])), dtype=dtype, persistable=False) @@ -133,7 +133,7 @@ class LayerHelperBase(object): block=self.startup_program.global_block()): if out is None: out = block.create_var( - name=unique_name.generate(".".join( + name=unique_name.generate_with_ignorable_key(".".join( [self.name, 'weight_norm_reshape'])), dtype=dtype, persistable=False) @@ -150,7 +150,7 @@ class LayerHelperBase(object): block=self.startup_program.global_block()): if out is None: out = block.create_var( - name=unique_name.generate(".".join( + name=unique_name.generate_with_ignorable_key(".".join( [self.name, 'weight_norm_transpose'])), dtype=dtype, persistable=False) @@ -168,7 +168,7 @@ class LayerHelperBase(object): """Computes the norm over all dimensions except dim""" if out is None: out = block.create_var( - name=unique_name.generate(".".join( + name=unique_name.generate_with_ignorable_key(".".join( [self.name, 'weight_norm_norm'])), dtype=dtype, persistable=False) @@ -327,7 +327,8 @@ class LayerHelperBase(object): infer_var_type. """ return self.main_program.current_block().create_var( - name=unique_name.generate(".".join([self.name, 'tmp'])), + name=unique_name.generate_with_ignorable_key(".".join( + [self.name, 'tmp'])), dtype=dtype, type=core.VarDesc.VarType.LOD_TENSOR, persistable=False, diff --git a/python/paddle/fluid/layers/collective.py b/python/paddle/fluid/layers/collective.py index 4fa0d1eb2..6beddac7a 100644 --- a/python/paddle/fluid/layers/collective.py +++ b/python/paddle/fluid/layers/collective.py @@ -33,7 +33,8 @@ def _allreduce(x, out=None, reduce_type="sum", sync_mode=False): if out is None: out = helper.create_variable( - name=unique_name.generate(".".join([x.name, 'tmp'])), + name=unique_name.generate_with_ignorable_key(".".join( + [x.name, 'tmp'])), shape=x.shape, dtype=x.dtype, type=x.type, diff --git a/python/paddle/fluid/layers/control_flow.py b/python/paddle/fluid/layers/control_flow.py index 5f5a2d105..3ed59cd8e 100644 --- a/python/paddle/fluid/layers/control_flow.py +++ b/python/paddle/fluid/layers/control_flow.py @@ -395,7 +395,7 @@ class StaticRNN(object): raise ValueError( "if init is None, memory at least need shape and batch_ref") parent_block = self._parent_block() - var_name = unique_name.generate("@".join( + var_name = unique_name.generate_with_ignorable_key("@".join( [self.helper.name, "memory_boot"])) boot_var = parent_block.create_var( name=var_name, @@ -418,7 +418,8 @@ class StaticRNN(object): return self.memory(init=boot_var) else: pre_mem = self.helper.create_variable( - name=unique_name.generate("@".join([self.helper.name, "mem"])), + name=unique_name.generate_with_ignorable_key("@".join( + [self.helper.name, "mem"])), dtype=init.dtype, shape=init.shape) self.memories[pre_mem.name] = StaticRNNMemoryLink( @@ -1563,11 +1564,13 @@ class IfElse(object): if id(x) not in self.input_table: parent_block = self._parent_block() out_true = parent_block.create_var( - name=unique_name.generate('ifelse_input' + self.helper.name), + name=unique_name.generate_with_ignorable_key('ifelse_input' + + self.helper.name), dtype=x.dtype) out_false = parent_block.create_var( - name=unique_name.generate('ifelse_input' + self.helper.name), + name=unique_name.generate_with_ignorable_key('ifelse_input' + + self.helper.name), dtype=x.dtype) parent_block.append_op( type='split_lod_tensor', @@ -1609,7 +1612,7 @@ class IfElse(object): raise TypeError("Each output should be a variable") # create outside tensor outside_out = parent_block.create_var( - name=unique_name.generate("_".join( + name=unique_name.generate_with_ignorable_key("_".join( [self.helper.name, 'output'])), dtype=each_out.dtype) out_table.append(outside_out) @@ -2027,7 +2030,7 @@ class DynamicRNN(object): parent_block = self._parent_block_() for each in outputs: outside_array = parent_block.create_var( - name=unique_name.generate("_".join( + name=unique_name.generate_with_ignorable_key("_".join( [self.helper.name, "output_array", each.name])), type=core.VarDesc.VarType.LOD_TENSOR_ARRAY, dtype=each.dtype) diff --git a/python/paddle/fluid/layers/device.py b/python/paddle/fluid/layers/device.py index 43ebd160d..78226a520 100644 --- a/python/paddle/fluid/layers/device.py +++ b/python/paddle/fluid/layers/device.py @@ -30,7 +30,7 @@ __all__ = [] def get_places(device_count=None, device_type=None): helper = LayerHelper('get_places', **locals()) out_places = helper.create_variable( - name=unique_name.generate(helper.name + ".out")) + name=unique_name.generate_with_ignorable_key(helper.name + ".out")) attrs = dict() if device_count is not None: attrs['device_count'] = int(device_count) diff --git a/python/paddle/fluid/layers/nn.py b/python/paddle/fluid/layers/nn.py index e49efd378..cc64985ba 100644 --- a/python/paddle/fluid/layers/nn.py +++ b/python/paddle/fluid/layers/nn.py @@ -9844,7 +9844,8 @@ def clip(x, min, max, name=None): helper = LayerHelper("clip", **locals()) if name is None: - name = unique_name.generate(".".join([helper.name, 'tmp'])) + name = unique_name.generate_with_ignorable_key(".".join( + [helper.name, 'tmp'])) out = helper.create_variable( type=x.type, name=name, dtype=x.dtype, persistable=False) @@ -9883,7 +9884,8 @@ def clip_by_norm(x, max_norm, name=None): helper = LayerHelper("clip_by_norm", **locals()) if name is None: - name = unique_name.generate(".".join([helper.name, 'tmp'])) + name = unique_name.generate_with_ignorable_key(".".join( + [helper.name, 'tmp'])) out = helper.create_variable( type=x.type, name=name, dtype=x.dtype, persistable=False) diff --git a/python/paddle/fluid/optimizer.py b/python/paddle/fluid/optimizer.py index 24bb5a5b3..107c435ea 100644 --- a/python/paddle/fluid/optimizer.py +++ b/python/paddle/fluid/optimizer.py @@ -839,7 +839,8 @@ class DGCMomentumOptimizer(MomentumOptimizer): helper = LayerHelper("dgc_clip_by_norm_op", **args) if name is None: - name = unique_name.generate(".".join([helper.name, 'tmp'])) + name = unique_name.generate_with_ignorable_key(".".join( + [helper.name, 'tmp'])) out = helper.create_variable( type=x.type, name=name, dtype=x.dtype, persistable=False) @@ -2057,7 +2058,8 @@ class ModelAverage(Optimizer): ).all_parameters(): if param.do_model_average != False: grad = param.block.create_var( - name=unique_name.generate(".".join([param.name, 'tmp'])), + name=unique_name.generate_with_ignorable_key(".".join( + [param.name, 'tmp'])), dtype=param.dtype, persistable=False, stop_gradient=True) diff --git a/python/paddle/fluid/unique_name.py b/python/paddle/fluid/unique_name.py index 324257c13..36334edbf 100644 --- a/python/paddle/fluid/unique_name.py +++ b/python/paddle/fluid/unique_name.py @@ -58,6 +58,29 @@ def generate(key): return generator(key) +# FIXME(zjl): The previous naming rule in static graph would +# cause memory leak in dygraph mode. It is because the previous +# nameing rule would use `conv_0.tmp` as the key, and in dygraph +# mode, `conv_i` increases as batch increases. Thus, keys would +# increase in a way like `conv_0.tmp`, `conv_1.tmp`, .... +# Not find a better way to fix this bug in dygraph mode. In TF, +# variable name is meaningless in eager execution mode, and in +# PyTorch, there is no variable name at all. Maybe we should +# discard variable name in dygraph mode. +# +# Another concern is that save/load inference. Usually, user +# would save model in static graph mode, and load it in dygraph +# mode. Therefore, we keep the variable name of Parameter currently. +# +# Please fix me if a better method is found. +def generate_with_ignorable_key(key): + from .framework import in_dygraph_mode + if in_dygraph_mode(): + key = "tmp" + + return generator(key) + + def switch(new_generator=None): global generator old = generator -- GitLab