diff --git a/python/paddle/trainer/config_parser.py b/python/paddle/trainer/config_parser.py index d403a6029a3e9d4c41b80a2206397dcdfe780026..575e1107413c22f1efe9c677093382a366fc3f67 100644 --- a/python/paddle/trainer/config_parser.py +++ b/python/paddle/trainer/config_parser.py @@ -2219,7 +2219,10 @@ def Link( # memory for recurrent layer group. # *name* and *size* are actual layer's name and size. -# will return name of the memory, +# If *name* is None, need to provide *memory_name* and need to use +# SetMemoryInput() later to specify the layer which this memory remembers. +# +# return the name of the memory, # use this name if you assign the memory as other layer's input # # boot frame of memory is zeroed by default, @@ -2231,15 +2234,18 @@ def Link( # can only be initailized by a *boot_layer* which is a sequence. # @config_func -def Memory( - name, - size, - is_sequence=False, - boot_layer=None, - boot_bias=False, - boot_bias_active_type="", - boot_with_const_id=None, ): - agent_name = name + "+delay1" +def Memory(name, + size, + is_sequence=False, + boot_layer=None, + boot_bias=False, + boot_bias_active_type="", + boot_with_const_id=None, + memory_name=None): + if not memory_name: + config_assert(name is not None, "name needs cannot be None") + memory_name = name + "+delay1" + agent_name = memory_name if is_sequence: agent_layer = SequenceAgentLayer(agent_name, size) else: @@ -2247,7 +2253,8 @@ def Memory( config_assert(g_current_submodel.is_recurrent_layer_group, 'Memory should be used in recurrent layer group only') memory = g_current_submodel.memories.add() - memory.layer_name = MakeLayerNameInSubmodel(name) + if name is not None: + memory.layer_name = MakeLayerNameInSubmodel(name) memory.link_name = MakeLayerNameInSubmodel(agent_name) memory.is_sequence = is_sequence options = sum((boot_layer is not None, bool(boot_bias), @@ -2271,6 +2278,17 @@ def Memory( return agent_name +@config_func +def SetMemoryInput(memory_name, layer_name): + memory_name = MakeLayerNameInSubmodel(memory_name) + layer_name = MakeLayerNameInSubmodel(layer_name) + for mem in g_current_submodel.memories: + if mem.link_name == memory_name: + mem.layer_name = layer_name + return + logger.fatal("Nonexistent memory name: " + memory_name) + + # Generator for recurrent layer group, to use it: # 1. define a id layer as output of layer group # 2. define a memory of this id layer, and assign a boot id(begin of sequence) diff --git a/python/paddle/trainer_config_helpers/default_decorators.py b/python/paddle/trainer_config_helpers/default_decorators.py index ad3efcbf369411b9c42b2a32ed05b04f86bf7de6..b7463a022a146d749711a55b278354b4cd90e907 100644 --- a/python/paddle/trainer_config_helpers/default_decorators.py +++ b/python/paddle/trainer_config_helpers/default_decorators.py @@ -93,13 +93,13 @@ def reset_hook(): register_parse_config_hook(reset_hook) -def wrap_name_default(name_prefix=None): +def wrap_name_default(name_prefix=None, name_param="name"): """ Decorator to set "name" arguments default to "{name_prefix}_{invoke_count}". .. code:: python - @default_name("some_name") + @wrap_name_default("some_name") def func(name=None): print name # name will never be None. If name is not set, # name will be "some_name_%d" @@ -111,7 +111,7 @@ def wrap_name_default(name_prefix=None): """ factory = DefaultNameFactory(name_prefix) _name_factories.append(factory) - return wrap_param_default(["name"], factory) + return wrap_param_default([name_param], factory) def wrap_param_attr_default(param_names=None, default_factory=None): diff --git a/python/paddle/trainer_config_helpers/layers.py b/python/paddle/trainer_config_helpers/layers.py index 1fdc4c462363712e8b5b4dee10d0aaa26f4deffa..4087f3051e20fc6cf49e6840be22183714e1f12f 100755 --- a/python/paddle/trainer_config_helpers/layers.py +++ b/python/paddle/trainer_config_helpers/layers.py @@ -280,6 +280,14 @@ class LayerOutput(object): """ assert False, "this method should not be invoked" + def set_input(self, input): + """ + Set the input for a memory layer. Can only be used for memory layer + """ + assert isinstance(input, LayerOutput) + assert self.layer_type == LayerType.MEMORY + SetMemoryInput(self.name, input.name) + ERROR_CLIPPING = 'error_clipping_threshold' DROPOUT = 'drop_rate' @@ -2570,8 +2578,10 @@ def concat_layer(input, act=None, name=None, layer_attr=None, bias_attr=None): size=sz) +@wrap_name_default("memory", "memory_name") def memory(name, size, + memory_name=None, is_seq=False, boot_layer=None, boot_bias=None, @@ -2593,14 +2603,32 @@ def memory(name, If boot_layer is not null, the memory is just the boot_layer's output. Set :code:`is_seq` is true boot layer is sequence. - The same name layer in recurrent group will set memory on each time step. - :param name: memory's name. + .. code-block:: python + + mem = memory(size=256, name='state') + state = fc_layer(input=mem, size=256, name='state') + + If you do not want to specify the name, you can equivalently use set_input() + to specify the layer needs to be remembered as the following: + + .. code-block:: python + mem = memory(size=256) + state = fc_layer(input=mem, size=256) + mem.set_input(mem) + + + :param name: the name of the layer which this memory remembers. + If name is None, user should call set_input() to specify the + name of the layer which this memory remembers. :type name: basestring :param size: size of memory. :type size: int + :param memory_name: the name of the memory. + It is ignored when name is provided. + :type memory_name: basestring :param is_seq: is sequence for boot_layer :type is_seq: bool :param boot_layer: boot layer of memory. @@ -2622,13 +2650,21 @@ def memory(name, boot_bias = ParamAttr.to_bias(boot_bias) assert boot_layer is None or isinstance(boot_layer, LayerOutput) + if name is not None: + memory_name = None - agent_name = Memory(name, size, is_seq, boot_layer.name - if boot_layer is not None else None, boot_bias, - boot_bias_active_type.name, boot_with_const_id) + memory_name = Memory( + name, + size, + is_sequence=is_seq, + boot_layer=boot_layer.name if boot_layer is not None else None, + boot_bias=boot_bias, + boot_bias_active_type=boot_bias_active_type.name, + boot_with_const_id=boot_with_const_id, + memory_name=memory_name) lout = LayerOutput( - name=agent_name, + name=memory_name, size=size, layer_type=LayerType.MEMORY, parents=[boot_layer] if boot_layer is not None else None) @@ -2754,8 +2790,8 @@ def gru_step_layer(input, :param name: :param gate_act: :param bias_attr: - :param param_attr: the parameter_attribute for transforming the output_mem - from previous step. + :param param_attr: the parameter_attribute for transforming the output_mem + from previous step. :param layer_attr: :return: LayerOutput object. :rtype: LayerOutput @@ -2766,10 +2802,10 @@ def gru_step_layer(input, Layer( name=name, type=LayerType.GRU_STEP_LAYER, - # The parameter here is for transforming the output_mem. The input has - # already been transformed outside this module so it does not need - # parameter associated with it. - # The parameter here is instead grouped with input is due to + # The parameter here is for transforming the output_mem. The input has + # already been transformed outside this module so it does not need + # parameter associated with it. + # The parameter here is instead grouped with input is due to # backward model compatibility. inputs=[Input(input.name, **param_attr.attr), output_mem.name], bias=ParamAttr.to_bias(bias_attr), @@ -3376,7 +3412,7 @@ def __cost_input__(input, label, weight=None): ipts = [Input(input.name), Input(label.name)] parents = [input, label] if weight is not None: - assert weight.layer_type == LayerType.DATA + assert weight.size == 1 ipts.append(Input(weight.name)) parents.append(weight) return ipts, parents @@ -4740,7 +4776,12 @@ def lambda_cost(input, @wrap_name_default() @layer_support() -def cross_entropy(input, label, name=None, coeff=1.0, layer_attr=None): +def cross_entropy(input, + label, + name=None, + coeff=1.0, + weight=None, + layer_attr=None): """ A loss layer for multi class entropy. @@ -4755,22 +4796,27 @@ def cross_entropy(input, label, name=None, coeff=1.0, layer_attr=None): :type input: LayerOutput. :param name: The name of this layers. It is not necessary. :type name: None|basestring. - :param coeff: The coefficient affects the gradient in the backward. + :param coeff: The cost is multiplied with coeff. + The coefficient affects the gradient in the backward. :type coeff: float. + :param weight: The cost of each sample is multiplied with each weight. + The weight should be a layer with size=1. Note that gradient + will not be calculated for weight. + :type weight: LayerOutout :param layer_attr: Extra Layer Attribute. :type layer_attr: ExtraLayerAttribute :return: LayerOutput object. :rtype: LayerOutput. """ + ipts, parents = __cost_input__(input, label, weight) Layer( name=name, type=LayerType.CROSS_ENTROPY, - inputs=[input.name, label.name], + inputs=ipts, coeff=coeff, **ExtraLayerAttribute.to_kwargs(layer_attr)) - return LayerOutput( - name, LayerType.CROSS_ENTROPY, parents=[input, label], size=1) + return LayerOutput(name, LayerType.CROSS_ENTROPY, parents=parents, size=1) @wrap_name_default() diff --git a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_rnn_group.protostr b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_rnn_group.protostr index 3e9d28416ed5066461e960f0a9f085e057c28346..a0fb729e062bdf6fd7d2a7c2ae364d1a2b32811d 100644 --- a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_rnn_group.protostr +++ b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_rnn_group.protostr @@ -331,6 +331,54 @@ layers { } trans_type: "non-seq" } +layers { + name: "__recurrent_group_3__" + type: "recurrent_layer_group" + active_type: "" +} +layers { + name: "seq_input@__recurrent_group_3__" + type: "scatter_agent" + size: 100 + active_type: "" +} +layers { + name: "__memory_6__@__recurrent_group_3__" + type: "agent" + size: 200 + active_type: "" +} +layers { + name: "__fc_layer_0__@__recurrent_group_3__" + type: "fc" + size: 200 + active_type: "tanh" + inputs { + input_layer_name: "seq_input@__recurrent_group_3__" + input_parameter_name: "___fc_layer_0__@__recurrent_group_3__.w0" + } + inputs { + input_layer_name: "__memory_6__@__recurrent_group_3__" + input_parameter_name: "___fc_layer_0__@__recurrent_group_3__.w1" + } + bias_parameter_name: "___fc_layer_0__@__recurrent_group_3__.wbias" +} +layers { + name: "__fc_layer_0__" + type: "gather_agent" + size: 200 + active_type: "" +} +layers { + name: "__last_seq_4__" + type: "seqlastins" + size: 200 + active_type: "linear" + inputs { + input_layer_name: "__fc_layer_0__" + } + trans_type: "non-seq" +} parameters { name: "___mixed_0__.w0" size: 40000 @@ -481,6 +529,36 @@ parameters { initial_strategy: 0 initial_smart: false } +parameters { + name: "___fc_layer_0__@__recurrent_group_3__.w0" + size: 20000 + initial_mean: 0.0 + initial_std: 0.1 + dims: 100 + dims: 200 + initial_strategy: 0 + initial_smart: true +} +parameters { + name: "___fc_layer_0__@__recurrent_group_3__.w1" + size: 40000 + initial_mean: 0.0 + initial_std: 0.0707106781187 + dims: 200 + dims: 200 + initial_strategy: 0 + initial_smart: true +} +parameters { + name: "___fc_layer_0__@__recurrent_group_3__.wbias" + size: 200 + initial_mean: 0.0 + initial_std: 0.0 + dims: 1 + dims: 200 + initial_strategy: 0 + initial_smart: false +} input_layer_names: "seq_input" input_layer_names: "sub_seq_input" output_layer_names: "__last_seq_0__" @@ -488,6 +566,7 @@ output_layer_names: "__first_seq_0__" output_layer_names: "__last_seq_1__" output_layer_names: "__last_seq_2__" output_layer_names: "__last_seq_3__" +output_layer_names: "__last_seq_4__" sub_models { name: "root" layer_names: "seq_input" @@ -510,6 +589,9 @@ sub_models { layer_names: "__gru_group_0___recurrent_group" layer_names: "__gru_group_0__" layer_names: "__last_seq_3__" + layer_names: "__recurrent_group_3__" + layer_names: "__fc_layer_0__" + layer_names: "__last_seq_4__" input_layer_names: "seq_input" input_layer_names: "sub_seq_input" output_layer_names: "__last_seq_0__" @@ -517,6 +599,7 @@ sub_models { output_layer_names: "__last_seq_1__" output_layer_names: "__last_seq_2__" output_layer_names: "__last_seq_3__" + output_layer_names: "__last_seq_4__" is_recurrent_layer_group: false } sub_models { @@ -647,4 +730,28 @@ sub_models { } target_inlinkid: -1 } +sub_models { + name: "__recurrent_group_3__" + layer_names: "seq_input@__recurrent_group_3__" + layer_names: "__memory_6__@__recurrent_group_3__" + layer_names: "__fc_layer_0__@__recurrent_group_3__" + is_recurrent_layer_group: true + reversed: false + memories { + layer_name: "__fc_layer_0__@__recurrent_group_3__" + link_name: "__memory_6__@__recurrent_group_3__" + is_sequence: false + } + in_links { + layer_name: "seq_input" + link_name: "seq_input@__recurrent_group_3__" + has_subseq: false + } + out_links { + layer_name: "__fc_layer_0__@__recurrent_group_3__" + link_name: "__fc_layer_0__" + has_subseq: false + } + target_inlinkid: -1 +} diff --git a/python/paddle/trainer_config_helpers/tests/configs/test_rnn_group.py b/python/paddle/trainer_config_helpers/tests/configs/test_rnn_group.py index 60b4849d69d497109ef5af3257e212df233a2d0b..91010759e4847f087eb4e05ad98ae794a2129365 100644 --- a/python/paddle/trainer_config_helpers/tests/configs/test_rnn_group.py +++ b/python/paddle/trainer_config_helpers/tests/configs/test_rnn_group.py @@ -16,6 +16,16 @@ def generate_rnn_simple(name): return rnn_simple +def generate_rnn_simple_no_name(): + def rnn_simple(s): + m = memory(name=None, size=200) + fc = fc_layer(input=[s, m], size=200) + m.set_input(fc) + return fc + + return rnn_simple + + with mixed_layer() as lstm_param: # test lstm unit, rnn group lstm_param += full_matrix_projection(input=seq, size=100 * 4) @@ -33,4 +43,6 @@ outputs( last_seq(input=lstmemory_group( input=lstm_param, size=100)), last_seq(input=gru_group( - input=gru_param, size=100))) + input=gru_param, size=100)), + last_seq(input=recurrent_group( + step=generate_rnn_simple_no_name(), input=seq)), )