未验证 提交 887a39f0 编写于 作者: Z Zeng Jinle 提交者: GitHub

Fix dygraph unique name bug (#17592)

* fix unique_name growth bug in dygraph mode,test=develop

* change generate_tmp to generate_with_ignorable_key,test=develop
上级 e9216d06
......@@ -85,19 +85,19 @@ class LayerHelperBase(object):
block=self.startup_program.global_block()):
if out is None:
out = block.create_var(
name=unique_name.generate(".".join(
name=unique_name.generate_with_ignorable_key(".".join(
[self.name, 'weight_norm_norm'])),
dtype=dtype,
persistable=False)
abs_out = block.create_var(
name=unique_name.generate(".".join(
name=unique_name.generate_with_ignorable_key(".".join(
[self.name, 'weight_norm_abs'])),
dtype=dtype,
persistable=False)
block.append_op(
type='abs', inputs={'X': x}, outputs={'Out': abs_out})
pow_out = block.create_var(
name=unique_name.generate(".".join(
name=unique_name.generate_with_ignorable_key(".".join(
[self.name, 'weight_norm_pow'])),
dtype=dtype,
persistable=False)
......@@ -107,7 +107,7 @@ class LayerHelperBase(object):
outputs={'Out': pow_out},
attrs={'factor': float(p)})
sum_out = block.create_var(
name=unique_name.generate(".".join(
name=unique_name.generate_with_ignorable_key(".".join(
[self.name, 'weight_norm_sum'])),
dtype=dtype,
persistable=False)
......@@ -133,7 +133,7 @@ class LayerHelperBase(object):
block=self.startup_program.global_block()):
if out is None:
out = block.create_var(
name=unique_name.generate(".".join(
name=unique_name.generate_with_ignorable_key(".".join(
[self.name, 'weight_norm_reshape'])),
dtype=dtype,
persistable=False)
......@@ -150,7 +150,7 @@ class LayerHelperBase(object):
block=self.startup_program.global_block()):
if out is None:
out = block.create_var(
name=unique_name.generate(".".join(
name=unique_name.generate_with_ignorable_key(".".join(
[self.name, 'weight_norm_transpose'])),
dtype=dtype,
persistable=False)
......@@ -168,7 +168,7 @@ class LayerHelperBase(object):
"""Computes the norm over all dimensions except dim"""
if out is None:
out = block.create_var(
name=unique_name.generate(".".join(
name=unique_name.generate_with_ignorable_key(".".join(
[self.name, 'weight_norm_norm'])),
dtype=dtype,
persistable=False)
......@@ -327,7 +327,8 @@ class LayerHelperBase(object):
infer_var_type.
"""
return self.main_program.current_block().create_var(
name=unique_name.generate(".".join([self.name, 'tmp'])),
name=unique_name.generate_with_ignorable_key(".".join(
[self.name, 'tmp'])),
dtype=dtype,
type=core.VarDesc.VarType.LOD_TENSOR,
persistable=False,
......
......@@ -33,7 +33,8 @@ def _allreduce(x, out=None, reduce_type="sum", sync_mode=False):
if out is None:
out = helper.create_variable(
name=unique_name.generate(".".join([x.name, 'tmp'])),
name=unique_name.generate_with_ignorable_key(".".join(
[x.name, 'tmp'])),
shape=x.shape,
dtype=x.dtype,
type=x.type,
......
......@@ -395,7 +395,7 @@ class StaticRNN(object):
raise ValueError(
"if init is None, memory at least need shape and batch_ref")
parent_block = self._parent_block()
var_name = unique_name.generate("@".join(
var_name = unique_name.generate_with_ignorable_key("@".join(
[self.helper.name, "memory_boot"]))
boot_var = parent_block.create_var(
name=var_name,
......@@ -418,7 +418,8 @@ class StaticRNN(object):
return self.memory(init=boot_var)
else:
pre_mem = self.helper.create_variable(
name=unique_name.generate("@".join([self.helper.name, "mem"])),
name=unique_name.generate_with_ignorable_key("@".join(
[self.helper.name, "mem"])),
dtype=init.dtype,
shape=init.shape)
self.memories[pre_mem.name] = StaticRNNMemoryLink(
......@@ -1563,11 +1564,13 @@ class IfElse(object):
if id(x) not in self.input_table:
parent_block = self._parent_block()
out_true = parent_block.create_var(
name=unique_name.generate('ifelse_input' + self.helper.name),
name=unique_name.generate_with_ignorable_key('ifelse_input' +
self.helper.name),
dtype=x.dtype)
out_false = parent_block.create_var(
name=unique_name.generate('ifelse_input' + self.helper.name),
name=unique_name.generate_with_ignorable_key('ifelse_input' +
self.helper.name),
dtype=x.dtype)
parent_block.append_op(
type='split_lod_tensor',
......@@ -1609,7 +1612,7 @@ class IfElse(object):
raise TypeError("Each output should be a variable")
# create outside tensor
outside_out = parent_block.create_var(
name=unique_name.generate("_".join(
name=unique_name.generate_with_ignorable_key("_".join(
[self.helper.name, 'output'])),
dtype=each_out.dtype)
out_table.append(outside_out)
......@@ -2027,7 +2030,7 @@ class DynamicRNN(object):
parent_block = self._parent_block_()
for each in outputs:
outside_array = parent_block.create_var(
name=unique_name.generate("_".join(
name=unique_name.generate_with_ignorable_key("_".join(
[self.helper.name, "output_array", each.name])),
type=core.VarDesc.VarType.LOD_TENSOR_ARRAY,
dtype=each.dtype)
......
......@@ -30,7 +30,7 @@ __all__ = []
def get_places(device_count=None, device_type=None):
helper = LayerHelper('get_places', **locals())
out_places = helper.create_variable(
name=unique_name.generate(helper.name + ".out"))
name=unique_name.generate_with_ignorable_key(helper.name + ".out"))
attrs = dict()
if device_count is not None:
attrs['device_count'] = int(device_count)
......
......@@ -9844,7 +9844,8 @@ def clip(x, min, max, name=None):
helper = LayerHelper("clip", **locals())
if name is None:
name = unique_name.generate(".".join([helper.name, 'tmp']))
name = unique_name.generate_with_ignorable_key(".".join(
[helper.name, 'tmp']))
out = helper.create_variable(
type=x.type, name=name, dtype=x.dtype, persistable=False)
......@@ -9883,7 +9884,8 @@ def clip_by_norm(x, max_norm, name=None):
helper = LayerHelper("clip_by_norm", **locals())
if name is None:
name = unique_name.generate(".".join([helper.name, 'tmp']))
name = unique_name.generate_with_ignorable_key(".".join(
[helper.name, 'tmp']))
out = helper.create_variable(
type=x.type, name=name, dtype=x.dtype, persistable=False)
......
......@@ -839,7 +839,8 @@ class DGCMomentumOptimizer(MomentumOptimizer):
helper = LayerHelper("dgc_clip_by_norm_op", **args)
if name is None:
name = unique_name.generate(".".join([helper.name, 'tmp']))
name = unique_name.generate_with_ignorable_key(".".join(
[helper.name, 'tmp']))
out = helper.create_variable(
type=x.type, name=name, dtype=x.dtype, persistable=False)
......@@ -2057,7 +2058,8 @@ class ModelAverage(Optimizer):
).all_parameters():
if param.do_model_average != False:
grad = param.block.create_var(
name=unique_name.generate(".".join([param.name, 'tmp'])),
name=unique_name.generate_with_ignorable_key(".".join(
[param.name, 'tmp'])),
dtype=param.dtype,
persistable=False,
stop_gradient=True)
......
......@@ -58,6 +58,29 @@ def generate(key):
return generator(key)
# FIXME(zjl): The previous naming rule in static graph would
# cause memory leak in dygraph mode. It is because the previous
# nameing rule would use `conv_0.tmp` as the key, and in dygraph
# mode, `conv_i` increases as batch increases. Thus, keys would
# increase in a way like `conv_0.tmp`, `conv_1.tmp`, ....
# Not find a better way to fix this bug in dygraph mode. In TF,
# variable name is meaningless in eager execution mode, and in
# PyTorch, there is no variable name at all. Maybe we should
# discard variable name in dygraph mode.
#
# Another concern is that save/load inference. Usually, user
# would save model in static graph mode, and load it in dygraph
# mode. Therefore, we keep the variable name of Parameter currently.
#
# Please fix me if a better method is found.
def generate_with_ignorable_key(key):
from .framework import in_dygraph_mode
if in_dygraph_mode():
key = "tmp"
return generator(key)
def switch(new_generator=None):
global generator
old = generator
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册