提交 7d551dd4 编写于 作者: X xuwei06

Make it possible to postpone setting the layer name for a memory.

The reason for adding the function is exemplified in the following hypothetical code:

mem = memory(name=None, size=256)
hidden = fc_layer(input=mem)
state = hidden + x
mem.set_input(state)

In the above code segment, it would be very annoying if we require the user to provide
the name at memory() call because the layer name of state is automatically generated
and is not easy to set it.

Change-Id: I918bf1d3d5c26addd88a6f7021e98b3e0e9df494
上级 a30f6aa9
......@@ -2219,7 +2219,10 @@ def Link(
# memory for recurrent layer group.
# *name* and *size* are actual layer's name and size.
# will return name of the memory,
# If *name* is None, need to provide *memory_name* and need to use
# SetMemoryInput() later to specify the layer which this memory remembers.
#
# return the name of the memory,
# use this name if you assign the memory as other layer's input
#
# boot frame of memory is zeroed by default,
......@@ -2231,15 +2234,18 @@ def Link(
# can only be initailized by a *boot_layer* which is a sequence.
#
@config_func
def Memory(
name,
size,
is_sequence=False,
boot_layer=None,
boot_bias=False,
boot_bias_active_type="",
boot_with_const_id=None, ):
agent_name = name + "+delay1"
def Memory(name,
size,
is_sequence=False,
boot_layer=None,
boot_bias=False,
boot_bias_active_type="",
boot_with_const_id=None,
memory_name=None):
if not memory_name:
config_assert(name is not None, "name needs cannot be None")
memory_name = name + "+delay1"
agent_name = memory_name
if is_sequence:
agent_layer = SequenceAgentLayer(agent_name, size)
else:
......@@ -2247,7 +2253,8 @@ def Memory(
config_assert(g_current_submodel.is_recurrent_layer_group,
'Memory should be used in recurrent layer group only')
memory = g_current_submodel.memories.add()
memory.layer_name = MakeLayerNameInSubmodel(name)
if name is not None:
memory.layer_name = MakeLayerNameInSubmodel(name)
memory.link_name = MakeLayerNameInSubmodel(agent_name)
memory.is_sequence = is_sequence
options = sum((boot_layer is not None, bool(boot_bias),
......@@ -2271,6 +2278,17 @@ def Memory(
return agent_name
@config_func
def SetMemoryInput(memory_name, layer_name):
memory_name = MakeLayerNameInSubmodel(memory_name)
layer_name = MakeLayerNameInSubmodel(layer_name)
for mem in g_current_submodel.memories:
if mem.link_name == memory_name:
mem.layer_name = layer_name
return
logger.fatal("Nonexistent memory name: " + memory_name)
# Generator for recurrent layer group, to use it:
# 1. define a id layer as output of layer group
# 2. define a memory of this id layer, and assign a boot id(begin of sequence)
......
......@@ -93,13 +93,13 @@ def reset_hook():
register_parse_config_hook(reset_hook)
def wrap_name_default(name_prefix=None):
def wrap_name_default(name_prefix=None, name_param="name"):
"""
Decorator to set "name" arguments default to "{name_prefix}_{invoke_count}".
.. code:: python
@default_name("some_name")
@wrap_name_default("some_name")
def func(name=None):
print name # name will never be None. If name is not set,
# name will be "some_name_%d"
......@@ -111,7 +111,7 @@ def wrap_name_default(name_prefix=None):
"""
factory = DefaultNameFactory(name_prefix)
_name_factories.append(factory)
return wrap_param_default(["name"], factory)
return wrap_param_default([name_param], factory)
def wrap_param_attr_default(param_names=None, default_factory=None):
......
......@@ -280,6 +280,14 @@ class LayerOutput(object):
"""
assert False, "this method should not be invoked"
def set_input(self, input):
"""
Set the input for a memory layer. Can only be used for memory layer
"""
assert isinstance(input, LayerOutput)
assert self.layer_type == LayerType.MEMORY
SetMemoryInput(self.name, input.name)
ERROR_CLIPPING = 'error_clipping_threshold'
DROPOUT = 'drop_rate'
......@@ -2570,8 +2578,10 @@ def concat_layer(input, act=None, name=None, layer_attr=None, bias_attr=None):
size=sz)
@wrap_name_default("memory", "memory_name")
def memory(name,
size,
memory_name=None,
is_seq=False,
boot_layer=None,
boot_bias=None,
......@@ -2593,14 +2603,32 @@ def memory(name,
If boot_layer is not null, the memory is just the boot_layer's output.
Set :code:`is_seq` is true boot layer is sequence.
The same name layer in recurrent group will set memory on each time
step.
:param name: memory's name.
.. code-block:: python
mem = memory(size=256, name='state')
state = fc_layer(input=mem, size=256, name='state')
If you do not want to specify the name, you can equivalently use set_input()
to specify the layer needs to be remembered as the following:
.. code-block:: python
mem = memory(size=256)
state = fc_layer(input=mem, size=256)
mem.set_input(mem)
:param name: the name of the layer which this memory remembers.
If name is None, user should call set_input() to specify the
name of the layer which this memory remembers.
:type name: basestring
:param size: size of memory.
:type size: int
:param memory_name: the name of the memory.
It is ignored when name is provided.
:type memory_name: basestring
:param is_seq: is sequence for boot_layer
:type is_seq: bool
:param boot_layer: boot layer of memory.
......@@ -2622,13 +2650,21 @@ def memory(name,
boot_bias = ParamAttr.to_bias(boot_bias)
assert boot_layer is None or isinstance(boot_layer, LayerOutput)
if name is not None:
memory_name = None
agent_name = Memory(name, size, is_seq, boot_layer.name
if boot_layer is not None else None, boot_bias,
boot_bias_active_type.name, boot_with_const_id)
memory_name = Memory(
name,
size,
is_sequence=is_seq,
boot_layer=boot_layer.name if boot_layer is not None else None,
boot_bias=boot_bias,
boot_bias_active_type=boot_bias_active_type.name,
boot_with_const_id=boot_with_const_id,
memory_name=memory_name)
lout = LayerOutput(
name=agent_name,
name=memory_name,
size=size,
layer_type=LayerType.MEMORY,
parents=[boot_layer] if boot_layer is not None else None)
......@@ -2754,8 +2790,8 @@ def gru_step_layer(input,
:param name:
:param gate_act:
:param bias_attr:
:param param_attr: the parameter_attribute for transforming the output_mem
from previous step.
:param param_attr: the parameter_attribute for transforming the output_mem
from previous step.
:param layer_attr:
:return: LayerOutput object.
:rtype: LayerOutput
......@@ -2766,10 +2802,10 @@ def gru_step_layer(input,
Layer(
name=name,
type=LayerType.GRU_STEP_LAYER,
# The parameter here is for transforming the output_mem. The input has
# already been transformed outside this module so it does not need
# parameter associated with it.
# The parameter here is instead grouped with input is due to
# The parameter here is for transforming the output_mem. The input has
# already been transformed outside this module so it does not need
# parameter associated with it.
# The parameter here is instead grouped with input is due to
# backward model compatibility.
inputs=[Input(input.name, **param_attr.attr), output_mem.name],
bias=ParamAttr.to_bias(bias_attr),
......@@ -3376,7 +3412,7 @@ def __cost_input__(input, label, weight=None):
ipts = [Input(input.name), Input(label.name)]
parents = [input, label]
if weight is not None:
assert weight.layer_type == LayerType.DATA
assert weight.size == 1
ipts.append(Input(weight.name))
parents.append(weight)
return ipts, parents
......@@ -4740,7 +4776,12 @@ def lambda_cost(input,
@wrap_name_default()
@layer_support()
def cross_entropy(input, label, name=None, coeff=1.0, layer_attr=None):
def cross_entropy(input,
label,
name=None,
coeff=1.0,
weight=None,
layer_attr=None):
"""
A loss layer for multi class entropy.
......@@ -4755,22 +4796,27 @@ def cross_entropy(input, label, name=None, coeff=1.0, layer_attr=None):
:type input: LayerOutput.
:param name: The name of this layers. It is not necessary.
:type name: None|basestring.
:param coeff: The coefficient affects the gradient in the backward.
:param coeff: The cost is multiplied with coeff.
The coefficient affects the gradient in the backward.
:type coeff: float.
:param weight: The cost of each sample is multiplied with each weight.
The weight should be a layer with size=1. Note that gradient
will not be calculated for weight.
:type weight: LayerOutout
:param layer_attr: Extra Layer Attribute.
:type layer_attr: ExtraLayerAttribute
:return: LayerOutput object.
:rtype: LayerOutput.
"""
ipts, parents = __cost_input__(input, label, weight)
Layer(
name=name,
type=LayerType.CROSS_ENTROPY,
inputs=[input.name, label.name],
inputs=ipts,
coeff=coeff,
**ExtraLayerAttribute.to_kwargs(layer_attr))
return LayerOutput(
name, LayerType.CROSS_ENTROPY, parents=[input, label], size=1)
return LayerOutput(name, LayerType.CROSS_ENTROPY, parents=parents, size=1)
@wrap_name_default()
......
......@@ -331,6 +331,54 @@ layers {
}
trans_type: "non-seq"
}
layers {
name: "__recurrent_group_3__"
type: "recurrent_layer_group"
active_type: ""
}
layers {
name: "seq_input@__recurrent_group_3__"
type: "scatter_agent"
size: 100
active_type: ""
}
layers {
name: "__memory_6__@__recurrent_group_3__"
type: "agent"
size: 200
active_type: ""
}
layers {
name: "__fc_layer_0__@__recurrent_group_3__"
type: "fc"
size: 200
active_type: "tanh"
inputs {
input_layer_name: "seq_input@__recurrent_group_3__"
input_parameter_name: "___fc_layer_0__@__recurrent_group_3__.w0"
}
inputs {
input_layer_name: "__memory_6__@__recurrent_group_3__"
input_parameter_name: "___fc_layer_0__@__recurrent_group_3__.w1"
}
bias_parameter_name: "___fc_layer_0__@__recurrent_group_3__.wbias"
}
layers {
name: "__fc_layer_0__"
type: "gather_agent"
size: 200
active_type: ""
}
layers {
name: "__last_seq_4__"
type: "seqlastins"
size: 200
active_type: "linear"
inputs {
input_layer_name: "__fc_layer_0__"
}
trans_type: "non-seq"
}
parameters {
name: "___mixed_0__.w0"
size: 40000
......@@ -481,6 +529,36 @@ parameters {
initial_strategy: 0
initial_smart: false
}
parameters {
name: "___fc_layer_0__@__recurrent_group_3__.w0"
size: 20000
initial_mean: 0.0
initial_std: 0.1
dims: 100
dims: 200
initial_strategy: 0
initial_smart: true
}
parameters {
name: "___fc_layer_0__@__recurrent_group_3__.w1"
size: 40000
initial_mean: 0.0
initial_std: 0.0707106781187
dims: 200
dims: 200
initial_strategy: 0
initial_smart: true
}
parameters {
name: "___fc_layer_0__@__recurrent_group_3__.wbias"
size: 200
initial_mean: 0.0
initial_std: 0.0
dims: 1
dims: 200
initial_strategy: 0
initial_smart: false
}
input_layer_names: "seq_input"
input_layer_names: "sub_seq_input"
output_layer_names: "__last_seq_0__"
......@@ -488,6 +566,7 @@ output_layer_names: "__first_seq_0__"
output_layer_names: "__last_seq_1__"
output_layer_names: "__last_seq_2__"
output_layer_names: "__last_seq_3__"
output_layer_names: "__last_seq_4__"
sub_models {
name: "root"
layer_names: "seq_input"
......@@ -510,6 +589,9 @@ sub_models {
layer_names: "__gru_group_0___recurrent_group"
layer_names: "__gru_group_0__"
layer_names: "__last_seq_3__"
layer_names: "__recurrent_group_3__"
layer_names: "__fc_layer_0__"
layer_names: "__last_seq_4__"
input_layer_names: "seq_input"
input_layer_names: "sub_seq_input"
output_layer_names: "__last_seq_0__"
......@@ -517,6 +599,7 @@ sub_models {
output_layer_names: "__last_seq_1__"
output_layer_names: "__last_seq_2__"
output_layer_names: "__last_seq_3__"
output_layer_names: "__last_seq_4__"
is_recurrent_layer_group: false
}
sub_models {
......@@ -647,4 +730,28 @@ sub_models {
}
target_inlinkid: -1
}
sub_models {
name: "__recurrent_group_3__"
layer_names: "seq_input@__recurrent_group_3__"
layer_names: "__memory_6__@__recurrent_group_3__"
layer_names: "__fc_layer_0__@__recurrent_group_3__"
is_recurrent_layer_group: true
reversed: false
memories {
layer_name: "__fc_layer_0__@__recurrent_group_3__"
link_name: "__memory_6__@__recurrent_group_3__"
is_sequence: false
}
in_links {
layer_name: "seq_input"
link_name: "seq_input@__recurrent_group_3__"
has_subseq: false
}
out_links {
layer_name: "__fc_layer_0__@__recurrent_group_3__"
link_name: "__fc_layer_0__"
has_subseq: false
}
target_inlinkid: -1
}
......@@ -16,6 +16,16 @@ def generate_rnn_simple(name):
return rnn_simple
def generate_rnn_simple_no_name():
def rnn_simple(s):
m = memory(name=None, size=200)
fc = fc_layer(input=[s, m], size=200)
m.set_input(fc)
return fc
return rnn_simple
with mixed_layer() as lstm_param: # test lstm unit, rnn group
lstm_param += full_matrix_projection(input=seq, size=100 * 4)
......@@ -33,4 +43,6 @@ outputs(
last_seq(input=lstmemory_group(
input=lstm_param, size=100)),
last_seq(input=gru_group(
input=gru_param, size=100)))
input=gru_param, size=100)),
last_seq(input=recurrent_group(
step=generate_rnn_simple_no_name(), input=seq)), )
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册