提交 24b00ac6 编写于 作者: Y Yu Yang 提交者: GitHub

Merge pull request #1313 from emailweixu/memory.set_input

Make it possible to postpone setting the layer name for a memory.
...@@ -2222,7 +2222,10 @@ def Link( ...@@ -2222,7 +2222,10 @@ def Link(
# memory for recurrent layer group. # memory for recurrent layer group.
# *name* and *size* are actual layer's name and size. # *name* and *size* are actual layer's name and size.
# will return name of the memory, # If *name* is None, need to provide *memory_name* and need to use
# SetMemoryInput() later to specify the layer which this memory remembers.
#
# return the name of the memory,
# use this name if you assign the memory as other layer's input # use this name if you assign the memory as other layer's input
# #
# boot frame of memory is zeroed by default, # boot frame of memory is zeroed by default,
...@@ -2234,15 +2237,18 @@ def Link( ...@@ -2234,15 +2237,18 @@ def Link(
# can only be initailized by a *boot_layer* which is a sequence. # can only be initailized by a *boot_layer* which is a sequence.
# #
@config_func @config_func
def Memory( def Memory(name,
name, size,
size, is_sequence=False,
is_sequence=False, boot_layer=None,
boot_layer=None, boot_bias=False,
boot_bias=False, boot_bias_active_type="",
boot_bias_active_type="", boot_with_const_id=None,
boot_with_const_id=None, ): memory_name=None):
agent_name = name + "+delay1" if not memory_name:
config_assert(name is not None, "name needs cannot be None")
memory_name = name + "+delay1"
agent_name = memory_name
if is_sequence: if is_sequence:
agent_layer = SequenceAgentLayer(agent_name, size) agent_layer = SequenceAgentLayer(agent_name, size)
else: else:
...@@ -2250,7 +2256,8 @@ def Memory( ...@@ -2250,7 +2256,8 @@ def Memory(
config_assert(g_current_submodel.is_recurrent_layer_group, config_assert(g_current_submodel.is_recurrent_layer_group,
'Memory should be used in recurrent layer group only') 'Memory should be used in recurrent layer group only')
memory = g_current_submodel.memories.add() memory = g_current_submodel.memories.add()
memory.layer_name = MakeLayerNameInSubmodel(name) if name is not None:
memory.layer_name = MakeLayerNameInSubmodel(name)
memory.link_name = MakeLayerNameInSubmodel(agent_name) memory.link_name = MakeLayerNameInSubmodel(agent_name)
memory.is_sequence = is_sequence memory.is_sequence = is_sequence
options = sum((boot_layer is not None, bool(boot_bias), options = sum((boot_layer is not None, bool(boot_bias),
...@@ -2274,6 +2281,17 @@ def Memory( ...@@ -2274,6 +2281,17 @@ def Memory(
return agent_name return agent_name
@config_func
def SetMemoryInput(memory_name, layer_name):
memory_name = MakeLayerNameInSubmodel(memory_name)
layer_name = MakeLayerNameInSubmodel(layer_name)
for mem in g_current_submodel.memories:
if mem.link_name == memory_name:
mem.layer_name = layer_name
return
logger.fatal("Nonexistent memory name: " + memory_name)
# Generator for recurrent layer group, to use it: # Generator for recurrent layer group, to use it:
# 1. define a id layer as output of layer group # 1. define a id layer as output of layer group
# 2. define a memory of this id layer, and assign a boot id(begin of sequence) # 2. define a memory of this id layer, and assign a boot id(begin of sequence)
......
...@@ -97,13 +97,13 @@ def reset_hook(): ...@@ -97,13 +97,13 @@ def reset_hook():
register_parse_config_hook(reset_hook) register_parse_config_hook(reset_hook)
def wrap_name_default(name_prefix=None): def wrap_name_default(name_prefix=None, name_param="name"):
""" """
Decorator to set "name" arguments default to "{name_prefix}_{invoke_count}". Decorator to set "name" arguments default to "{name_prefix}_{invoke_count}".
.. code:: python .. code:: python
@default_name("some_name") @wrap_name_default("some_name")
def func(name=None): def func(name=None):
print name # name will never be None. If name is not set, print name # name will never be None. If name is not set,
# name will be "some_name_%d" # name will be "some_name_%d"
...@@ -115,7 +115,7 @@ def wrap_name_default(name_prefix=None): ...@@ -115,7 +115,7 @@ def wrap_name_default(name_prefix=None):
""" """
factory = DefaultNameFactory(name_prefix) factory = DefaultNameFactory(name_prefix)
_name_factories.append(factory) _name_factories.append(factory)
return wrap_param_default(["name"], factory) return wrap_param_default([name_param], factory)
def wrap_param_attr_default(param_names=None, default_factory=None): def wrap_param_attr_default(param_names=None, default_factory=None):
......
...@@ -288,6 +288,14 @@ class LayerOutput(object): ...@@ -288,6 +288,14 @@ class LayerOutput(object):
""" """
assert False, "this method should not be invoked" assert False, "this method should not be invoked"
def set_input(self, input):
"""
Set the input for a memory layer. Can only be used for memory layer
"""
assert isinstance(input, LayerOutput)
assert self.layer_type == LayerType.MEMORY
SetMemoryInput(self.name, input.name)
ERROR_CLIPPING = 'error_clipping_threshold' ERROR_CLIPPING = 'error_clipping_threshold'
DROPOUT = 'drop_rate' DROPOUT = 'drop_rate'
...@@ -2759,8 +2767,10 @@ def seq_concat_layer(a, b, act=None, name=None, layer_attr=None, ...@@ -2759,8 +2767,10 @@ def seq_concat_layer(a, b, act=None, name=None, layer_attr=None,
size=a.size) size=a.size)
@wrap_name_default("memory", "memory_name")
def memory(name, def memory(name,
size, size,
memory_name=None,
is_seq=False, is_seq=False,
boot_layer=None, boot_layer=None,
boot_bias=None, boot_bias=None,
...@@ -2782,14 +2792,32 @@ def memory(name, ...@@ -2782,14 +2792,32 @@ def memory(name,
If boot_layer is not null, the memory is just the boot_layer's output. If boot_layer is not null, the memory is just the boot_layer's output.
Set :code:`is_seq` is true boot layer is sequence. Set :code:`is_seq` is true boot layer is sequence.
The same name layer in recurrent group will set memory on each time The same name layer in recurrent group will set memory on each time
step. step.
:param name: memory's name. .. code-block:: python
mem = memory(size=256, name='state')
state = fc_layer(input=mem, size=256, name='state')
If you do not want to specify the name, you can equivalently use set_input()
to specify the layer needs to be remembered as the following:
.. code-block:: python
mem = memory(size=256)
state = fc_layer(input=mem, size=256)
mem.set_input(mem)
:param name: the name of the layer which this memory remembers.
If name is None, user should call set_input() to specify the
name of the layer which this memory remembers.
:type name: basestring :type name: basestring
:param size: size of memory. :param size: size of memory.
:type size: int :type size: int
:param memory_name: the name of the memory.
It is ignored when name is provided.
:type memory_name: basestring
:param is_seq: is sequence for boot_layer :param is_seq: is sequence for boot_layer
:type is_seq: bool :type is_seq: bool
:param boot_layer: boot layer of memory. :param boot_layer: boot layer of memory.
...@@ -2811,13 +2839,21 @@ def memory(name, ...@@ -2811,13 +2839,21 @@ def memory(name,
boot_bias = ParamAttr.to_bias(boot_bias) boot_bias = ParamAttr.to_bias(boot_bias)
assert boot_layer is None or isinstance(boot_layer, LayerOutput) assert boot_layer is None or isinstance(boot_layer, LayerOutput)
if name is not None:
memory_name = None
agent_name = Memory(name, size, is_seq, boot_layer.name memory_name = Memory(
if boot_layer is not None else None, boot_bias, name,
boot_bias_active_type.name, boot_with_const_id) size,
is_sequence=is_seq,
boot_layer=boot_layer.name if boot_layer is not None else None,
boot_bias=boot_bias,
boot_bias_active_type=boot_bias_active_type.name,
boot_with_const_id=boot_with_const_id,
memory_name=memory_name)
lout = LayerOutput( lout = LayerOutput(
name=agent_name, name=memory_name,
size=size, size=size,
layer_type=LayerType.MEMORY, layer_type=LayerType.MEMORY,
parents=[boot_layer] if boot_layer is not None else None) parents=[boot_layer] if boot_layer is not None else None)
...@@ -3565,7 +3601,7 @@ def __cost_input__(input, label, weight=None): ...@@ -3565,7 +3601,7 @@ def __cost_input__(input, label, weight=None):
ipts = [Input(input.name), Input(label.name)] ipts = [Input(input.name), Input(label.name)]
parents = [input, label] parents = [input, label]
if weight is not None: if weight is not None:
assert weight.layer_type == LayerType.DATA assert weight.size == 1
ipts.append(Input(weight.name)) ipts.append(Input(weight.name))
parents.append(weight) parents.append(weight)
return ipts, parents return ipts, parents
...@@ -4946,7 +4982,12 @@ def lambda_cost(input, ...@@ -4946,7 +4982,12 @@ def lambda_cost(input,
@wrap_name_default() @wrap_name_default()
@layer_support() @layer_support()
def cross_entropy(input, label, name=None, coeff=1.0, layer_attr=None): def cross_entropy(input,
label,
name=None,
coeff=1.0,
weight=None,
layer_attr=None):
""" """
A loss layer for multi class entropy. A loss layer for multi class entropy.
...@@ -4961,22 +5002,27 @@ def cross_entropy(input, label, name=None, coeff=1.0, layer_attr=None): ...@@ -4961,22 +5002,27 @@ def cross_entropy(input, label, name=None, coeff=1.0, layer_attr=None):
:type input: LayerOutput. :type input: LayerOutput.
:param name: The name of this layers. It is not necessary. :param name: The name of this layers. It is not necessary.
:type name: None|basestring. :type name: None|basestring.
:param coeff: The coefficient affects the gradient in the backward. :param coeff: The cost is multiplied with coeff.
The coefficient affects the gradient in the backward.
:type coeff: float. :type coeff: float.
:param weight: The cost of each sample is multiplied with each weight.
The weight should be a layer with size=1. Note that gradient
will not be calculated for weight.
:type weight: LayerOutout
:param layer_attr: Extra Layer Attribute. :param layer_attr: Extra Layer Attribute.
:type layer_attr: ExtraLayerAttribute :type layer_attr: ExtraLayerAttribute
:return: LayerOutput object. :return: LayerOutput object.
:rtype: LayerOutput. :rtype: LayerOutput.
""" """
ipts, parents = __cost_input__(input, label, weight)
Layer( Layer(
name=name, name=name,
type=LayerType.CROSS_ENTROPY, type=LayerType.CROSS_ENTROPY,
inputs=[input.name, label.name], inputs=ipts,
coeff=coeff, coeff=coeff,
**ExtraLayerAttribute.to_kwargs(layer_attr)) **ExtraLayerAttribute.to_kwargs(layer_attr))
return LayerOutput( return LayerOutput(name, LayerType.CROSS_ENTROPY, parents=parents, size=1)
name, LayerType.CROSS_ENTROPY, parents=[input, label], size=1)
@wrap_name_default() @wrap_name_default()
......
...@@ -331,6 +331,54 @@ layers { ...@@ -331,6 +331,54 @@ layers {
} }
trans_type: "non-seq" trans_type: "non-seq"
} }
layers {
name: "__recurrent_group_3__"
type: "recurrent_layer_group"
active_type: ""
}
layers {
name: "seq_input@__recurrent_group_3__"
type: "scatter_agent"
size: 100
active_type: ""
}
layers {
name: "__memory_6__@__recurrent_group_3__"
type: "agent"
size: 200
active_type: ""
}
layers {
name: "__fc_layer_0__@__recurrent_group_3__"
type: "fc"
size: 200
active_type: "tanh"
inputs {
input_layer_name: "seq_input@__recurrent_group_3__"
input_parameter_name: "___fc_layer_0__@__recurrent_group_3__.w0"
}
inputs {
input_layer_name: "__memory_6__@__recurrent_group_3__"
input_parameter_name: "___fc_layer_0__@__recurrent_group_3__.w1"
}
bias_parameter_name: "___fc_layer_0__@__recurrent_group_3__.wbias"
}
layers {
name: "__fc_layer_0__"
type: "gather_agent"
size: 200
active_type: ""
}
layers {
name: "__last_seq_4__"
type: "seqlastins"
size: 200
active_type: "linear"
inputs {
input_layer_name: "__fc_layer_0__"
}
trans_type: "non-seq"
}
parameters { parameters {
name: "___mixed_0__.w0" name: "___mixed_0__.w0"
size: 40000 size: 40000
...@@ -481,6 +529,36 @@ parameters { ...@@ -481,6 +529,36 @@ parameters {
initial_strategy: 0 initial_strategy: 0
initial_smart: false initial_smart: false
} }
parameters {
name: "___fc_layer_0__@__recurrent_group_3__.w0"
size: 20000
initial_mean: 0.0
initial_std: 0.1
dims: 100
dims: 200
initial_strategy: 0
initial_smart: true
}
parameters {
name: "___fc_layer_0__@__recurrent_group_3__.w1"
size: 40000
initial_mean: 0.0
initial_std: 0.0707106781187
dims: 200
dims: 200
initial_strategy: 0
initial_smart: true
}
parameters {
name: "___fc_layer_0__@__recurrent_group_3__.wbias"
size: 200
initial_mean: 0.0
initial_std: 0.0
dims: 1
dims: 200
initial_strategy: 0
initial_smart: false
}
input_layer_names: "seq_input" input_layer_names: "seq_input"
input_layer_names: "sub_seq_input" input_layer_names: "sub_seq_input"
output_layer_names: "__last_seq_0__" output_layer_names: "__last_seq_0__"
...@@ -488,6 +566,7 @@ output_layer_names: "__first_seq_0__" ...@@ -488,6 +566,7 @@ output_layer_names: "__first_seq_0__"
output_layer_names: "__last_seq_1__" output_layer_names: "__last_seq_1__"
output_layer_names: "__last_seq_2__" output_layer_names: "__last_seq_2__"
output_layer_names: "__last_seq_3__" output_layer_names: "__last_seq_3__"
output_layer_names: "__last_seq_4__"
sub_models { sub_models {
name: "root" name: "root"
layer_names: "seq_input" layer_names: "seq_input"
...@@ -510,6 +589,9 @@ sub_models { ...@@ -510,6 +589,9 @@ sub_models {
layer_names: "__gru_group_0___recurrent_group" layer_names: "__gru_group_0___recurrent_group"
layer_names: "__gru_group_0__" layer_names: "__gru_group_0__"
layer_names: "__last_seq_3__" layer_names: "__last_seq_3__"
layer_names: "__recurrent_group_3__"
layer_names: "__fc_layer_0__"
layer_names: "__last_seq_4__"
input_layer_names: "seq_input" input_layer_names: "seq_input"
input_layer_names: "sub_seq_input" input_layer_names: "sub_seq_input"
output_layer_names: "__last_seq_0__" output_layer_names: "__last_seq_0__"
...@@ -517,6 +599,7 @@ sub_models { ...@@ -517,6 +599,7 @@ sub_models {
output_layer_names: "__last_seq_1__" output_layer_names: "__last_seq_1__"
output_layer_names: "__last_seq_2__" output_layer_names: "__last_seq_2__"
output_layer_names: "__last_seq_3__" output_layer_names: "__last_seq_3__"
output_layer_names: "__last_seq_4__"
is_recurrent_layer_group: false is_recurrent_layer_group: false
} }
sub_models { sub_models {
...@@ -647,4 +730,28 @@ sub_models { ...@@ -647,4 +730,28 @@ sub_models {
} }
target_inlinkid: -1 target_inlinkid: -1
} }
sub_models {
name: "__recurrent_group_3__"
layer_names: "seq_input@__recurrent_group_3__"
layer_names: "__memory_6__@__recurrent_group_3__"
layer_names: "__fc_layer_0__@__recurrent_group_3__"
is_recurrent_layer_group: true
reversed: false
memories {
layer_name: "__fc_layer_0__@__recurrent_group_3__"
link_name: "__memory_6__@__recurrent_group_3__"
is_sequence: false
}
in_links {
layer_name: "seq_input"
link_name: "seq_input@__recurrent_group_3__"
has_subseq: false
}
out_links {
layer_name: "__fc_layer_0__@__recurrent_group_3__"
link_name: "__fc_layer_0__"
has_subseq: false
}
target_inlinkid: -1
}
...@@ -16,6 +16,16 @@ def generate_rnn_simple(name): ...@@ -16,6 +16,16 @@ def generate_rnn_simple(name):
return rnn_simple return rnn_simple
def generate_rnn_simple_no_name():
def rnn_simple(s):
m = memory(name=None, size=200)
fc = fc_layer(input=[s, m], size=200)
m.set_input(fc)
return fc
return rnn_simple
with mixed_layer() as lstm_param: # test lstm unit, rnn group with mixed_layer() as lstm_param: # test lstm unit, rnn group
lstm_param += full_matrix_projection(input=seq, size=100 * 4) lstm_param += full_matrix_projection(input=seq, size=100 * 4)
...@@ -33,4 +43,6 @@ outputs( ...@@ -33,4 +43,6 @@ outputs(
last_seq(input=lstmemory_group( last_seq(input=lstmemory_group(
input=lstm_param, size=100)), input=lstm_param, size=100)),
last_seq(input=gru_group( last_seq(input=gru_group(
input=gru_param, size=100))) input=gru_param, size=100)),
last_seq(input=recurrent_group(
step=generate_rnn_simple_no_name(), input=seq)), )
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册