未验证 提交 887a39f0 编写于 作者: Z Zeng Jinle 提交者: GitHub

Fix dygraph unique name bug (#17592)

* fix unique_name growth bug in dygraph mode,test=develop

* change generate_tmp to generate_with_ignorable_key,test=develop
上级 e9216d06
...@@ -85,19 +85,19 @@ class LayerHelperBase(object): ...@@ -85,19 +85,19 @@ class LayerHelperBase(object):
block=self.startup_program.global_block()): block=self.startup_program.global_block()):
if out is None: if out is None:
out = block.create_var( out = block.create_var(
name=unique_name.generate(".".join( name=unique_name.generate_with_ignorable_key(".".join(
[self.name, 'weight_norm_norm'])), [self.name, 'weight_norm_norm'])),
dtype=dtype, dtype=dtype,
persistable=False) persistable=False)
abs_out = block.create_var( abs_out = block.create_var(
name=unique_name.generate(".".join( name=unique_name.generate_with_ignorable_key(".".join(
[self.name, 'weight_norm_abs'])), [self.name, 'weight_norm_abs'])),
dtype=dtype, dtype=dtype,
persistable=False) persistable=False)
block.append_op( block.append_op(
type='abs', inputs={'X': x}, outputs={'Out': abs_out}) type='abs', inputs={'X': x}, outputs={'Out': abs_out})
pow_out = block.create_var( pow_out = block.create_var(
name=unique_name.generate(".".join( name=unique_name.generate_with_ignorable_key(".".join(
[self.name, 'weight_norm_pow'])), [self.name, 'weight_norm_pow'])),
dtype=dtype, dtype=dtype,
persistable=False) persistable=False)
...@@ -107,7 +107,7 @@ class LayerHelperBase(object): ...@@ -107,7 +107,7 @@ class LayerHelperBase(object):
outputs={'Out': pow_out}, outputs={'Out': pow_out},
attrs={'factor': float(p)}) attrs={'factor': float(p)})
sum_out = block.create_var( sum_out = block.create_var(
name=unique_name.generate(".".join( name=unique_name.generate_with_ignorable_key(".".join(
[self.name, 'weight_norm_sum'])), [self.name, 'weight_norm_sum'])),
dtype=dtype, dtype=dtype,
persistable=False) persistable=False)
...@@ -133,7 +133,7 @@ class LayerHelperBase(object): ...@@ -133,7 +133,7 @@ class LayerHelperBase(object):
block=self.startup_program.global_block()): block=self.startup_program.global_block()):
if out is None: if out is None:
out = block.create_var( out = block.create_var(
name=unique_name.generate(".".join( name=unique_name.generate_with_ignorable_key(".".join(
[self.name, 'weight_norm_reshape'])), [self.name, 'weight_norm_reshape'])),
dtype=dtype, dtype=dtype,
persistable=False) persistable=False)
...@@ -150,7 +150,7 @@ class LayerHelperBase(object): ...@@ -150,7 +150,7 @@ class LayerHelperBase(object):
block=self.startup_program.global_block()): block=self.startup_program.global_block()):
if out is None: if out is None:
out = block.create_var( out = block.create_var(
name=unique_name.generate(".".join( name=unique_name.generate_with_ignorable_key(".".join(
[self.name, 'weight_norm_transpose'])), [self.name, 'weight_norm_transpose'])),
dtype=dtype, dtype=dtype,
persistable=False) persistable=False)
...@@ -168,7 +168,7 @@ class LayerHelperBase(object): ...@@ -168,7 +168,7 @@ class LayerHelperBase(object):
"""Computes the norm over all dimensions except dim""" """Computes the norm over all dimensions except dim"""
if out is None: if out is None:
out = block.create_var( out = block.create_var(
name=unique_name.generate(".".join( name=unique_name.generate_with_ignorable_key(".".join(
[self.name, 'weight_norm_norm'])), [self.name, 'weight_norm_norm'])),
dtype=dtype, dtype=dtype,
persistable=False) persistable=False)
...@@ -327,7 +327,8 @@ class LayerHelperBase(object): ...@@ -327,7 +327,8 @@ class LayerHelperBase(object):
infer_var_type. infer_var_type.
""" """
return self.main_program.current_block().create_var( return self.main_program.current_block().create_var(
name=unique_name.generate(".".join([self.name, 'tmp'])), name=unique_name.generate_with_ignorable_key(".".join(
[self.name, 'tmp'])),
dtype=dtype, dtype=dtype,
type=core.VarDesc.VarType.LOD_TENSOR, type=core.VarDesc.VarType.LOD_TENSOR,
persistable=False, persistable=False,
......
...@@ -33,7 +33,8 @@ def _allreduce(x, out=None, reduce_type="sum", sync_mode=False): ...@@ -33,7 +33,8 @@ def _allreduce(x, out=None, reduce_type="sum", sync_mode=False):
if out is None: if out is None:
out = helper.create_variable( out = helper.create_variable(
name=unique_name.generate(".".join([x.name, 'tmp'])), name=unique_name.generate_with_ignorable_key(".".join(
[x.name, 'tmp'])),
shape=x.shape, shape=x.shape,
dtype=x.dtype, dtype=x.dtype,
type=x.type, type=x.type,
......
...@@ -395,7 +395,7 @@ class StaticRNN(object): ...@@ -395,7 +395,7 @@ class StaticRNN(object):
raise ValueError( raise ValueError(
"if init is None, memory at least need shape and batch_ref") "if init is None, memory at least need shape and batch_ref")
parent_block = self._parent_block() parent_block = self._parent_block()
var_name = unique_name.generate("@".join( var_name = unique_name.generate_with_ignorable_key("@".join(
[self.helper.name, "memory_boot"])) [self.helper.name, "memory_boot"]))
boot_var = parent_block.create_var( boot_var = parent_block.create_var(
name=var_name, name=var_name,
...@@ -418,7 +418,8 @@ class StaticRNN(object): ...@@ -418,7 +418,8 @@ class StaticRNN(object):
return self.memory(init=boot_var) return self.memory(init=boot_var)
else: else:
pre_mem = self.helper.create_variable( pre_mem = self.helper.create_variable(
name=unique_name.generate("@".join([self.helper.name, "mem"])), name=unique_name.generate_with_ignorable_key("@".join(
[self.helper.name, "mem"])),
dtype=init.dtype, dtype=init.dtype,
shape=init.shape) shape=init.shape)
self.memories[pre_mem.name] = StaticRNNMemoryLink( self.memories[pre_mem.name] = StaticRNNMemoryLink(
...@@ -1563,11 +1564,13 @@ class IfElse(object): ...@@ -1563,11 +1564,13 @@ class IfElse(object):
if id(x) not in self.input_table: if id(x) not in self.input_table:
parent_block = self._parent_block() parent_block = self._parent_block()
out_true = parent_block.create_var( out_true = parent_block.create_var(
name=unique_name.generate('ifelse_input' + self.helper.name), name=unique_name.generate_with_ignorable_key('ifelse_input' +
self.helper.name),
dtype=x.dtype) dtype=x.dtype)
out_false = parent_block.create_var( out_false = parent_block.create_var(
name=unique_name.generate('ifelse_input' + self.helper.name), name=unique_name.generate_with_ignorable_key('ifelse_input' +
self.helper.name),
dtype=x.dtype) dtype=x.dtype)
parent_block.append_op( parent_block.append_op(
type='split_lod_tensor', type='split_lod_tensor',
...@@ -1609,7 +1612,7 @@ class IfElse(object): ...@@ -1609,7 +1612,7 @@ class IfElse(object):
raise TypeError("Each output should be a variable") raise TypeError("Each output should be a variable")
# create outside tensor # create outside tensor
outside_out = parent_block.create_var( outside_out = parent_block.create_var(
name=unique_name.generate("_".join( name=unique_name.generate_with_ignorable_key("_".join(
[self.helper.name, 'output'])), [self.helper.name, 'output'])),
dtype=each_out.dtype) dtype=each_out.dtype)
out_table.append(outside_out) out_table.append(outside_out)
...@@ -2027,7 +2030,7 @@ class DynamicRNN(object): ...@@ -2027,7 +2030,7 @@ class DynamicRNN(object):
parent_block = self._parent_block_() parent_block = self._parent_block_()
for each in outputs: for each in outputs:
outside_array = parent_block.create_var( outside_array = parent_block.create_var(
name=unique_name.generate("_".join( name=unique_name.generate_with_ignorable_key("_".join(
[self.helper.name, "output_array", each.name])), [self.helper.name, "output_array", each.name])),
type=core.VarDesc.VarType.LOD_TENSOR_ARRAY, type=core.VarDesc.VarType.LOD_TENSOR_ARRAY,
dtype=each.dtype) dtype=each.dtype)
......
...@@ -30,7 +30,7 @@ __all__ = [] ...@@ -30,7 +30,7 @@ __all__ = []
def get_places(device_count=None, device_type=None): def get_places(device_count=None, device_type=None):
helper = LayerHelper('get_places', **locals()) helper = LayerHelper('get_places', **locals())
out_places = helper.create_variable( out_places = helper.create_variable(
name=unique_name.generate(helper.name + ".out")) name=unique_name.generate_with_ignorable_key(helper.name + ".out"))
attrs = dict() attrs = dict()
if device_count is not None: if device_count is not None:
attrs['device_count'] = int(device_count) attrs['device_count'] = int(device_count)
......
...@@ -9844,7 +9844,8 @@ def clip(x, min, max, name=None): ...@@ -9844,7 +9844,8 @@ def clip(x, min, max, name=None):
helper = LayerHelper("clip", **locals()) helper = LayerHelper("clip", **locals())
if name is None: if name is None:
name = unique_name.generate(".".join([helper.name, 'tmp'])) name = unique_name.generate_with_ignorable_key(".".join(
[helper.name, 'tmp']))
out = helper.create_variable( out = helper.create_variable(
type=x.type, name=name, dtype=x.dtype, persistable=False) type=x.type, name=name, dtype=x.dtype, persistable=False)
...@@ -9883,7 +9884,8 @@ def clip_by_norm(x, max_norm, name=None): ...@@ -9883,7 +9884,8 @@ def clip_by_norm(x, max_norm, name=None):
helper = LayerHelper("clip_by_norm", **locals()) helper = LayerHelper("clip_by_norm", **locals())
if name is None: if name is None:
name = unique_name.generate(".".join([helper.name, 'tmp'])) name = unique_name.generate_with_ignorable_key(".".join(
[helper.name, 'tmp']))
out = helper.create_variable( out = helper.create_variable(
type=x.type, name=name, dtype=x.dtype, persistable=False) type=x.type, name=name, dtype=x.dtype, persistable=False)
......
...@@ -839,7 +839,8 @@ class DGCMomentumOptimizer(MomentumOptimizer): ...@@ -839,7 +839,8 @@ class DGCMomentumOptimizer(MomentumOptimizer):
helper = LayerHelper("dgc_clip_by_norm_op", **args) helper = LayerHelper("dgc_clip_by_norm_op", **args)
if name is None: if name is None:
name = unique_name.generate(".".join([helper.name, 'tmp'])) name = unique_name.generate_with_ignorable_key(".".join(
[helper.name, 'tmp']))
out = helper.create_variable( out = helper.create_variable(
type=x.type, name=name, dtype=x.dtype, persistable=False) type=x.type, name=name, dtype=x.dtype, persistable=False)
...@@ -2057,7 +2058,8 @@ class ModelAverage(Optimizer): ...@@ -2057,7 +2058,8 @@ class ModelAverage(Optimizer):
).all_parameters(): ).all_parameters():
if param.do_model_average != False: if param.do_model_average != False:
grad = param.block.create_var( grad = param.block.create_var(
name=unique_name.generate(".".join([param.name, 'tmp'])), name=unique_name.generate_with_ignorable_key(".".join(
[param.name, 'tmp'])),
dtype=param.dtype, dtype=param.dtype,
persistable=False, persistable=False,
stop_gradient=True) stop_gradient=True)
......
...@@ -58,6 +58,29 @@ def generate(key): ...@@ -58,6 +58,29 @@ def generate(key):
return generator(key) return generator(key)
# FIXME(zjl): The previous naming rule in static graph would
# cause memory leak in dygraph mode. It is because the previous
# nameing rule would use `conv_0.tmp` as the key, and in dygraph
# mode, `conv_i` increases as batch increases. Thus, keys would
# increase in a way like `conv_0.tmp`, `conv_1.tmp`, ....
# Not find a better way to fix this bug in dygraph mode. In TF,
# variable name is meaningless in eager execution mode, and in
# PyTorch, there is no variable name at all. Maybe we should
# discard variable name in dygraph mode.
#
# Another concern is that save/load inference. Usually, user
# would save model in static graph mode, and load it in dygraph
# mode. Therefore, we keep the variable name of Parameter currently.
#
# Please fix me if a better method is found.
def generate_with_ignorable_key(key):
from .framework import in_dygraph_mode
if in_dygraph_mode():
key = "tmp"
return generator(key)
def switch(new_generator=None): def switch(new_generator=None):
global generator global generator
old = generator old = generator
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册