提交 24b00ac6 编写于 作者: Y Yu Yang 提交者: GitHub

Merge pull request #1313 from emailweixu/memory.set_input

Make it possible to postpone setting the layer name for a memory.
......@@ -2222,7 +2222,10 @@ def Link(
# memory for recurrent layer group.
# *name* and *size* are actual layer's name and size.
# will return name of the memory,
# If *name* is None, need to provide *memory_name* and need to use
# SetMemoryInput() later to specify the layer which this memory remembers.
#
# return the name of the memory,
# use this name if you assign the memory as other layer's input
#
# boot frame of memory is zeroed by default,
......@@ -2234,15 +2237,18 @@ def Link(
# can only be initailized by a *boot_layer* which is a sequence.
#
@config_func
def Memory(
name,
size,
is_sequence=False,
boot_layer=None,
boot_bias=False,
boot_bias_active_type="",
boot_with_const_id=None, ):
agent_name = name + "+delay1"
def Memory(name,
size,
is_sequence=False,
boot_layer=None,
boot_bias=False,
boot_bias_active_type="",
boot_with_const_id=None,
memory_name=None):
if not memory_name:
config_assert(name is not None, "name needs cannot be None")
memory_name = name + "+delay1"
agent_name = memory_name
if is_sequence:
agent_layer = SequenceAgentLayer(agent_name, size)
else:
......@@ -2250,7 +2256,8 @@ def Memory(
config_assert(g_current_submodel.is_recurrent_layer_group,
'Memory should be used in recurrent layer group only')
memory = g_current_submodel.memories.add()
memory.layer_name = MakeLayerNameInSubmodel(name)
if name is not None:
memory.layer_name = MakeLayerNameInSubmodel(name)
memory.link_name = MakeLayerNameInSubmodel(agent_name)
memory.is_sequence = is_sequence
options = sum((boot_layer is not None, bool(boot_bias),
......@@ -2274,6 +2281,17 @@ def Memory(
return agent_name
@config_func
def SetMemoryInput(memory_name, layer_name):
memory_name = MakeLayerNameInSubmodel(memory_name)
layer_name = MakeLayerNameInSubmodel(layer_name)
for mem in g_current_submodel.memories:
if mem.link_name == memory_name:
mem.layer_name = layer_name
return
logger.fatal("Nonexistent memory name: " + memory_name)
# Generator for recurrent layer group, to use it:
# 1. define a id layer as output of layer group
# 2. define a memory of this id layer, and assign a boot id(begin of sequence)
......
......@@ -97,13 +97,13 @@ def reset_hook():
register_parse_config_hook(reset_hook)
def wrap_name_default(name_prefix=None):
def wrap_name_default(name_prefix=None, name_param="name"):
"""
Decorator to set "name" arguments default to "{name_prefix}_{invoke_count}".
.. code:: python
@default_name("some_name")
@wrap_name_default("some_name")
def func(name=None):
print name # name will never be None. If name is not set,
# name will be "some_name_%d"
......@@ -115,7 +115,7 @@ def wrap_name_default(name_prefix=None):
"""
factory = DefaultNameFactory(name_prefix)
_name_factories.append(factory)
return wrap_param_default(["name"], factory)
return wrap_param_default([name_param], factory)
def wrap_param_attr_default(param_names=None, default_factory=None):
......
......@@ -288,6 +288,14 @@ class LayerOutput(object):
"""
assert False, "this method should not be invoked"
def set_input(self, input):
"""
Set the input for a memory layer. Can only be used for memory layer
"""
assert isinstance(input, LayerOutput)
assert self.layer_type == LayerType.MEMORY
SetMemoryInput(self.name, input.name)
ERROR_CLIPPING = 'error_clipping_threshold'
DROPOUT = 'drop_rate'
......@@ -2759,8 +2767,10 @@ def seq_concat_layer(a, b, act=None, name=None, layer_attr=None,
size=a.size)
@wrap_name_default("memory", "memory_name")
def memory(name,
size,
memory_name=None,
is_seq=False,
boot_layer=None,
boot_bias=None,
......@@ -2782,14 +2792,32 @@ def memory(name,
If boot_layer is not null, the memory is just the boot_layer's output.
Set :code:`is_seq` is true boot layer is sequence.
The same name layer in recurrent group will set memory on each time
step.
:param name: memory's name.
.. code-block:: python
mem = memory(size=256, name='state')
state = fc_layer(input=mem, size=256, name='state')
If you do not want to specify the name, you can equivalently use set_input()
to specify the layer needs to be remembered as the following:
.. code-block:: python
mem = memory(size=256)
state = fc_layer(input=mem, size=256)
mem.set_input(mem)
:param name: the name of the layer which this memory remembers.
If name is None, user should call set_input() to specify the
name of the layer which this memory remembers.
:type name: basestring
:param size: size of memory.
:type size: int
:param memory_name: the name of the memory.
It is ignored when name is provided.
:type memory_name: basestring
:param is_seq: is sequence for boot_layer
:type is_seq: bool
:param boot_layer: boot layer of memory.
......@@ -2811,13 +2839,21 @@ def memory(name,
boot_bias = ParamAttr.to_bias(boot_bias)
assert boot_layer is None or isinstance(boot_layer, LayerOutput)
if name is not None:
memory_name = None
agent_name = Memory(name, size, is_seq, boot_layer.name
if boot_layer is not None else None, boot_bias,
boot_bias_active_type.name, boot_with_const_id)
memory_name = Memory(
name,
size,
is_sequence=is_seq,
boot_layer=boot_layer.name if boot_layer is not None else None,
boot_bias=boot_bias,
boot_bias_active_type=boot_bias_active_type.name,
boot_with_const_id=boot_with_const_id,
memory_name=memory_name)
lout = LayerOutput(
name=agent_name,
name=memory_name,
size=size,
layer_type=LayerType.MEMORY,
parents=[boot_layer] if boot_layer is not None else None)
......@@ -3565,7 +3601,7 @@ def __cost_input__(input, label, weight=None):
ipts = [Input(input.name), Input(label.name)]
parents = [input, label]
if weight is not None:
assert weight.layer_type == LayerType.DATA
assert weight.size == 1
ipts.append(Input(weight.name))
parents.append(weight)
return ipts, parents
......@@ -4946,7 +4982,12 @@ def lambda_cost(input,
@wrap_name_default()
@layer_support()
def cross_entropy(input, label, name=None, coeff=1.0, layer_attr=None):
def cross_entropy(input,
label,
name=None,
coeff=1.0,
weight=None,
layer_attr=None):
"""
A loss layer for multi class entropy.
......@@ -4961,22 +5002,27 @@ def cross_entropy(input, label, name=None, coeff=1.0, layer_attr=None):
:type input: LayerOutput.
:param name: The name of this layers. It is not necessary.
:type name: None|basestring.
:param coeff: The coefficient affects the gradient in the backward.
:param coeff: The cost is multiplied with coeff.
The coefficient affects the gradient in the backward.
:type coeff: float.
:param weight: The cost of each sample is multiplied with each weight.
The weight should be a layer with size=1. Note that gradient
will not be calculated for weight.
:type weight: LayerOutout
:param layer_attr: Extra Layer Attribute.
:type layer_attr: ExtraLayerAttribute
:return: LayerOutput object.
:rtype: LayerOutput.
"""
ipts, parents = __cost_input__(input, label, weight)
Layer(
name=name,
type=LayerType.CROSS_ENTROPY,
inputs=[input.name, label.name],
inputs=ipts,
coeff=coeff,
**ExtraLayerAttribute.to_kwargs(layer_attr))
return LayerOutput(
name, LayerType.CROSS_ENTROPY, parents=[input, label], size=1)
return LayerOutput(name, LayerType.CROSS_ENTROPY, parents=parents, size=1)
@wrap_name_default()
......
......@@ -331,6 +331,54 @@ layers {
}
trans_type: "non-seq"
}
layers {
name: "__recurrent_group_3__"
type: "recurrent_layer_group"
active_type: ""
}
layers {
name: "seq_input@__recurrent_group_3__"
type: "scatter_agent"
size: 100
active_type: ""
}
layers {
name: "__memory_6__@__recurrent_group_3__"
type: "agent"
size: 200
active_type: ""
}
layers {
name: "__fc_layer_0__@__recurrent_group_3__"
type: "fc"
size: 200
active_type: "tanh"
inputs {
input_layer_name: "seq_input@__recurrent_group_3__"
input_parameter_name: "___fc_layer_0__@__recurrent_group_3__.w0"
}
inputs {
input_layer_name: "__memory_6__@__recurrent_group_3__"
input_parameter_name: "___fc_layer_0__@__recurrent_group_3__.w1"
}
bias_parameter_name: "___fc_layer_0__@__recurrent_group_3__.wbias"
}
layers {
name: "__fc_layer_0__"
type: "gather_agent"
size: 200
active_type: ""
}
layers {
name: "__last_seq_4__"
type: "seqlastins"
size: 200
active_type: "linear"
inputs {
input_layer_name: "__fc_layer_0__"
}
trans_type: "non-seq"
}
parameters {
name: "___mixed_0__.w0"
size: 40000
......@@ -481,6 +529,36 @@ parameters {
initial_strategy: 0
initial_smart: false
}
parameters {
name: "___fc_layer_0__@__recurrent_group_3__.w0"
size: 20000
initial_mean: 0.0
initial_std: 0.1
dims: 100
dims: 200
initial_strategy: 0
initial_smart: true
}
parameters {
name: "___fc_layer_0__@__recurrent_group_3__.w1"
size: 40000
initial_mean: 0.0
initial_std: 0.0707106781187
dims: 200
dims: 200
initial_strategy: 0
initial_smart: true
}
parameters {
name: "___fc_layer_0__@__recurrent_group_3__.wbias"
size: 200
initial_mean: 0.0
initial_std: 0.0
dims: 1
dims: 200
initial_strategy: 0
initial_smart: false
}
input_layer_names: "seq_input"
input_layer_names: "sub_seq_input"
output_layer_names: "__last_seq_0__"
......@@ -488,6 +566,7 @@ output_layer_names: "__first_seq_0__"
output_layer_names: "__last_seq_1__"
output_layer_names: "__last_seq_2__"
output_layer_names: "__last_seq_3__"
output_layer_names: "__last_seq_4__"
sub_models {
name: "root"
layer_names: "seq_input"
......@@ -510,6 +589,9 @@ sub_models {
layer_names: "__gru_group_0___recurrent_group"
layer_names: "__gru_group_0__"
layer_names: "__last_seq_3__"
layer_names: "__recurrent_group_3__"
layer_names: "__fc_layer_0__"
layer_names: "__last_seq_4__"
input_layer_names: "seq_input"
input_layer_names: "sub_seq_input"
output_layer_names: "__last_seq_0__"
......@@ -517,6 +599,7 @@ sub_models {
output_layer_names: "__last_seq_1__"
output_layer_names: "__last_seq_2__"
output_layer_names: "__last_seq_3__"
output_layer_names: "__last_seq_4__"
is_recurrent_layer_group: false
}
sub_models {
......@@ -647,4 +730,28 @@ sub_models {
}
target_inlinkid: -1
}
sub_models {
name: "__recurrent_group_3__"
layer_names: "seq_input@__recurrent_group_3__"
layer_names: "__memory_6__@__recurrent_group_3__"
layer_names: "__fc_layer_0__@__recurrent_group_3__"
is_recurrent_layer_group: true
reversed: false
memories {
layer_name: "__fc_layer_0__@__recurrent_group_3__"
link_name: "__memory_6__@__recurrent_group_3__"
is_sequence: false
}
in_links {
layer_name: "seq_input"
link_name: "seq_input@__recurrent_group_3__"
has_subseq: false
}
out_links {
layer_name: "__fc_layer_0__@__recurrent_group_3__"
link_name: "__fc_layer_0__"
has_subseq: false
}
target_inlinkid: -1
}
......@@ -16,6 +16,16 @@ def generate_rnn_simple(name):
return rnn_simple
def generate_rnn_simple_no_name():
def rnn_simple(s):
m = memory(name=None, size=200)
fc = fc_layer(input=[s, m], size=200)
m.set_input(fc)
return fc
return rnn_simple
with mixed_layer() as lstm_param: # test lstm unit, rnn group
lstm_param += full_matrix_projection(input=seq, size=100 * 4)
......@@ -33,4 +43,6 @@ outputs(
last_seq(input=lstmemory_group(
input=lstm_param, size=100)),
last_seq(input=gru_group(
input=gru_param, size=100)))
input=gru_param, size=100)),
last_seq(input=recurrent_group(
step=generate_rnn_simple_no_name(), input=seq)), )
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册