diff --git a/doc/api/v2/config/layer.rst b/doc/api/v2/config/layer.rst index 65d4c472c35f0fe726c32e4745b7352b2fc87d4d..59bd8b91255944a8ef702edd389214c17d0cb35d 100644 --- a/doc/api/v2/config/layer.rst +++ b/doc/api/v2/config/layer.rst @@ -442,6 +442,14 @@ eos .. autoclass:: paddle.v2.layer.eos :noindex: +Miscs +===== + +dropout +-------------- +.. autoclass:: paddle.v2.layer.dropout + :noindex: + Activation with learnable parameter =================================== @@ -449,4 +457,3 @@ prelu -------- .. autoclass:: paddle.v2.layer.prelu :noindex: - diff --git a/doc/api/v2/config/networks.rst b/doc/api/v2/config/networks.rst index b2a617fff134035c04eeabbbaf6d9cbe2a525f1c..6e813ab1a820d068ea3e54cad6178f1cf928eadc 100644 --- a/doc/api/v2/config/networks.rst +++ b/doc/api/v2/config/networks.rst @@ -125,11 +125,3 @@ simple_attention :members: simple_attention :noindex: -Miscs -===== - -dropout_layer --------------- -.. automodule:: paddle.v2.networks - :members: dropout_layer - :noindex: diff --git a/python/paddle/trainer/config_parser.py b/python/paddle/trainer/config_parser.py index 3712135b69773deab90f22545d517255b769d5df..c5ec5349cf0c097d7bdbb50bd6d6d3568ff32400 100644 --- a/python/paddle/trainer/config_parser.py +++ b/python/paddle/trainer/config_parser.py @@ -3546,11 +3546,7 @@ def update_g_config(): return g_config -def begin_parse(config_arg_str=''): - ''' - @param config_arg_str: a string of the form var1=val1,var2=val2. It will be - passed to config script as a dictionary CONFIG_ARGS - ''' +def begin_parse(): init_config_environment() for hook in _parse_config_hooks: hook() @@ -3568,8 +3564,12 @@ def begin_parse(config_arg_str=''): def parse_config(trainer_config, config_arg_str): - begin_parse(config_arg_str) + ''' + @param config_arg_str: a string of the form var1=val1,var2=val2. It will be + passed to config script as a dictionary CONFIG_ARGS + ''' + begin_parse() config_args = {} if config_arg_str: diff --git a/python/paddle/trainer_config_helpers/layers.py b/python/paddle/trainer_config_helpers/layers.py index 84933c59091e5e663ceda101c00522ce0f562b8e..5320f5c32ce00f4780cea16abaee718c95707467 100755 --- a/python/paddle/trainer_config_helpers/layers.py +++ b/python/paddle/trainer_config_helpers/layers.py @@ -121,6 +121,7 @@ __all__ = [ 'smooth_l1_cost', 'layer_support', 'multiplex_layer', + 'dropout_layer', 'prelu_layer', ] @@ -3771,7 +3772,6 @@ def beam_search(step, assert generated_input_index != -1 gipt = input[generated_input_index] - assert isinstance(gipt, BaseGeneratedInput) gipt.bos_id = bos_id gipt.eos_id = eos_id @@ -3791,7 +3791,6 @@ def beam_search(step, predict = gipt.after_real_step(step(*args)) eos_layer(input=predict, eos_id=eos_id, name=eos_name) - return predict tmp = recurrent_group( @@ -5567,6 +5566,24 @@ def multiplex_layer(input, name=None, layer_attr=None): size=l.config.size) +@wrap_name_default("dropout") +def dropout_layer(input, dropout_rate, name=None): + """ + @TODO(yuyang18): Add comments. + + :param name: + :param input: + :param dropout_rate: + :return: + """ + return addto_layer( + name=name, + input=input, + act=LinearActivation(), + bias_attr=False, + layer_attr=ExtraAttr(drop_rate=dropout_rate)) + + @wrap_name_default() @layer_support() @wrap_name_default() diff --git a/python/paddle/trainer_config_helpers/networks.py b/python/paddle/trainer_config_helpers/networks.py index fb533a47e0b0585be6f0e019086993f8b3aa7f38..1bf59ed4840ae69afc5bce49c86a08b60e9603ee 100755 --- a/python/paddle/trainer_config_helpers/networks.py +++ b/python/paddle/trainer_config_helpers/networks.py @@ -26,10 +26,10 @@ from paddle.trainer.config_parser import * __all__ = [ 'sequence_conv_pool', 'simple_lstm', "simple_img_conv_pool", - "img_conv_bn_pool", 'dropout_layer', 'lstmemory_group', 'lstmemory_unit', - 'small_vgg', 'img_conv_group', 'vgg_16_network', 'gru_unit', 'gru_group', - 'simple_gru', 'simple_attention', 'simple_gru2', 'bidirectional_gru', - 'text_conv_pool', 'bidirectional_lstm', 'inputs', 'outputs' + "img_conv_bn_pool", 'lstmemory_group', 'lstmemory_unit', 'small_vgg', + 'img_conv_group', 'vgg_16_network', 'gru_unit', 'gru_group', 'simple_gru', + 'simple_attention', 'simple_gru2', 'bidirectional_gru', 'text_conv_pool', + 'bidirectional_lstm', 'inputs', 'outputs' ] ###################################################### @@ -1366,29 +1366,6 @@ def simple_attention(encoded_sequence, input=scaled, pooling_type=SumPooling(), name="%s_pooling" % name) -############################################################################ -# Miscs # -############################################################################ - - -@wrap_name_default("dropout") -def dropout_layer(input, dropout_rate, name=None): - """ - @TODO(yuyang18): Add comments. - - :param name: - :param input: - :param dropout_rate: - :return: - """ - return addto_layer( - name=name, - input=input, - act=LinearActivation(), - bias_attr=False, - layer_attr=ExtraAttr(drop_rate=dropout_rate)) - - def inputs(layers, *args): """ Declare the inputs of network. The order of input should be as same as diff --git a/python/paddle/v2/layer.py b/python/paddle/v2/layer.py index 815635f5dd4654fe3a31a9244e6e4473c397dd2f..aeed9ebd7d4d64efa5d0bf1638742a485c0fa44a 100644 --- a/python/paddle/v2/layer.py +++ b/python/paddle/v2/layer.py @@ -13,7 +13,7 @@ # limitations under the License. """ `paddle.v2.layer` is a part of model config packages in paddle.v2. In API v2, -we want to make Paddle a plain Python package. The model config package defined +we want to make Paddle a plain Python package. The model config package defines the way how to configure a neural network topology in Paddle Python code. The primary usage shows below. @@ -30,7 +30,6 @@ The primary usage shows below. # use prediction instance where needed. parameters = paddle.parameters.create(cost) """ - import collections import copy import re @@ -44,9 +43,10 @@ __all__ = ['data', 'parse_network'] def __need_to_keep__(name): - if name in ['StaticInput', 'LayerType', 'layer_support']: - return False - return True + return name in [ + 'StaticInput', 'SubsequenceInput', 'GeneratedInput', 'LayerType', + 'layer_support' + ] def __need_to_wrap__(name): @@ -54,6 +54,8 @@ def __need_to_wrap__(name): def __convert_name__(inname): + if __need_to_keep__(inname): + return inname if inname == 'maxid_layer': return 'max_id' elif inname.endswith('memory') or inname.endswith( @@ -74,8 +76,6 @@ def __convert_name__(inname): for name in v1_layers.__all__: obj = getattr(v1_layers, name) - if not __need_to_keep__(name): - continue new_name = __convert_name__(name) if callable(obj) and __need_to_wrap__(name): globals()[new_name] = __convert_to_v2__(obj, new_name, __name__) @@ -107,7 +107,7 @@ __data_layer__.__doc__ = __map_data_docstr__(v1_layers.data_layer.__doc__) data = __convert_to_v2__(__data_layer__, 'name', __name__) -def __get_used_layers__(output_layers, extra_layers=None): +def __get_used_layers__(output_layers): layer_names = set() parents = {} @@ -132,6 +132,13 @@ def __get_used_layers__(output_layers, extra_layers=None): add_parent(mem.layer_name, mem.boot_layer_name) add_parent(mem.link_name, mem.layer_name) + if sub_model.HasField('generator'): + # according to the implementation of text generation + # in recurrent layer group, the generated word must be + # the first out link + add_parent(sub_model.out_links[0].layer_name, + sub_model.generator.eos_layer_name) + def dfs_travel(layer_name): if layer_name in layer_names: return @@ -247,9 +254,9 @@ def __trim_submodel__(old_submodel, layer_names, input_layer_names, def parse_network(output_layers, extra_layers=None): if not isinstance(output_layers, collections.Sequence): output_layers = [output_layers] - if extra_layers is not None and not isinstance(extra_layers, - collections.Sequence): - extra_layers = [extra_layers] + if extra_layers is not None: + if not isinstance(extra_layers, collections.Sequence): + extra_layers = [extra_layers] else: extra_layers = [] @@ -262,18 +269,29 @@ def parse_network(output_layers, extra_layers=None): model_config = ModelConfig() model_config.type = cp.g_config.model_config.type + + for layer in output_layers: + model_config.output_layer_names.append(layer.full_name) + output_layer_names.add(layer.full_name) + for l in cp.g_config.model_config.layers: if l.name not in layer_names: continue model_config.layers.extend([l]) if l.type == 'data': + if l.name in model_config.output_layer_names: + """ + In text generation, the outlink to save the generated word + indices is a data_layer defined in recurrent_group. This + data_layer is sure to be the output of the network in text + generation task, so this statement excludes such a special + data_layer from being inputs of the network, otherwise an error + will occur during data feeding. + """ + continue model_config.input_layer_names.append(l.name) input_layer_names.add(l.name) - for layer in output_layers: - model_config.output_layer_names.append(layer.full_name) - output_layer_names.add(layer.full_name) - for e in cp.g_config.model_config.evaluators: if e.name in evaluator_names: model_config.evaluators.extend([e]) diff --git a/python/paddle/v2/topology.py b/python/paddle/v2/topology.py index f3bb4d5f10dd6c5b220161e32dfc3a94642ac7a2..a20e878d0817d0a75e9c47a44f8765deca99225c 100644 --- a/python/paddle/v2/topology.py +++ b/python/paddle/v2/topology.py @@ -31,7 +31,6 @@ class Topology(object): def __init__(self, layers, extra_layers=None): def __check__(layers): if not isinstance(layers, collections.Sequence): - __check_layer_type__(layers) layers = [layers] for layer in layers: __check_layer_type__(layer) @@ -91,6 +90,7 @@ class Topology(object): [('image', dense_vector(768)), ('label', integer_value(10))] """ data_layers = self.data_layers() + return [(nm, data_layers[nm].data_type) for nm in self.proto().input_layer_names]