提交 027c5db3 编写于 作者: C Cao Ying 提交者: GitHub

Merge pull request #2384 from lcy-seso/fix_config_parsing_bug

fix bugs of parsing network topology in text generation task.
......@@ -442,6 +442,14 @@ eos
.. autoclass:: paddle.v2.layer.eos
:noindex:
Miscs
=====
dropout
--------------
.. autoclass:: paddle.v2.layer.dropout
:noindex:
Activation with learnable parameter
===================================
......@@ -449,4 +457,3 @@ prelu
--------
.. autoclass:: paddle.v2.layer.prelu
:noindex:
......@@ -125,11 +125,3 @@ simple_attention
:members: simple_attention
:noindex:
Miscs
=====
dropout_layer
--------------
.. automodule:: paddle.v2.networks
:members: dropout_layer
:noindex:
......@@ -3546,11 +3546,7 @@ def update_g_config():
return g_config
def begin_parse(config_arg_str=''):
'''
@param config_arg_str: a string of the form var1=val1,var2=val2. It will be
passed to config script as a dictionary CONFIG_ARGS
'''
def begin_parse():
init_config_environment()
for hook in _parse_config_hooks:
hook()
......@@ -3568,8 +3564,12 @@ def begin_parse(config_arg_str=''):
def parse_config(trainer_config, config_arg_str):
begin_parse(config_arg_str)
'''
@param config_arg_str: a string of the form var1=val1,var2=val2. It will be
passed to config script as a dictionary CONFIG_ARGS
'''
begin_parse()
config_args = {}
if config_arg_str:
......
......@@ -121,6 +121,7 @@ __all__ = [
'smooth_l1_cost',
'layer_support',
'multiplex_layer',
'dropout_layer',
'prelu_layer',
]
......@@ -3771,7 +3772,6 @@ def beam_search(step,
assert generated_input_index != -1
gipt = input[generated_input_index]
assert isinstance(gipt, BaseGeneratedInput)
gipt.bos_id = bos_id
gipt.eos_id = eos_id
......@@ -3791,7 +3791,6 @@ def beam_search(step,
predict = gipt.after_real_step(step(*args))
eos_layer(input=predict, eos_id=eos_id, name=eos_name)
return predict
tmp = recurrent_group(
......@@ -5567,6 +5566,24 @@ def multiplex_layer(input, name=None, layer_attr=None):
size=l.config.size)
@wrap_name_default("dropout")
def dropout_layer(input, dropout_rate, name=None):
"""
@TODO(yuyang18): Add comments.
:param name:
:param input:
:param dropout_rate:
:return:
"""
return addto_layer(
name=name,
input=input,
act=LinearActivation(),
bias_attr=False,
layer_attr=ExtraAttr(drop_rate=dropout_rate))
@wrap_name_default()
@layer_support()
@wrap_name_default()
......
......@@ -26,10 +26,10 @@ from paddle.trainer.config_parser import *
__all__ = [
'sequence_conv_pool', 'simple_lstm', "simple_img_conv_pool",
"img_conv_bn_pool", 'dropout_layer', 'lstmemory_group', 'lstmemory_unit',
'small_vgg', 'img_conv_group', 'vgg_16_network', 'gru_unit', 'gru_group',
'simple_gru', 'simple_attention', 'simple_gru2', 'bidirectional_gru',
'text_conv_pool', 'bidirectional_lstm', 'inputs', 'outputs'
"img_conv_bn_pool", 'lstmemory_group', 'lstmemory_unit', 'small_vgg',
'img_conv_group', 'vgg_16_network', 'gru_unit', 'gru_group', 'simple_gru',
'simple_attention', 'simple_gru2', 'bidirectional_gru', 'text_conv_pool',
'bidirectional_lstm', 'inputs', 'outputs'
]
######################################################
......@@ -1366,29 +1366,6 @@ def simple_attention(encoded_sequence,
input=scaled, pooling_type=SumPooling(), name="%s_pooling" % name)
############################################################################
# Miscs #
############################################################################
@wrap_name_default("dropout")
def dropout_layer(input, dropout_rate, name=None):
"""
@TODO(yuyang18): Add comments.
:param name:
:param input:
:param dropout_rate:
:return:
"""
return addto_layer(
name=name,
input=input,
act=LinearActivation(),
bias_attr=False,
layer_attr=ExtraAttr(drop_rate=dropout_rate))
def inputs(layers, *args):
"""
Declare the inputs of network. The order of input should be as same as
......
......@@ -13,7 +13,7 @@
# limitations under the License.
"""
`paddle.v2.layer` is a part of model config packages in paddle.v2. In API v2,
we want to make Paddle a plain Python package. The model config package defined
we want to make Paddle a plain Python package. The model config package defines
the way how to configure a neural network topology in Paddle Python code.
The primary usage shows below.
......@@ -30,7 +30,6 @@ The primary usage shows below.
# use prediction instance where needed.
parameters = paddle.parameters.create(cost)
"""
import collections
import copy
import re
......@@ -44,9 +43,10 @@ __all__ = ['data', 'parse_network']
def __need_to_keep__(name):
if name in ['StaticInput', 'LayerType', 'layer_support']:
return False
return True
return name in [
'StaticInput', 'SubsequenceInput', 'GeneratedInput', 'LayerType',
'layer_support'
]
def __need_to_wrap__(name):
......@@ -54,6 +54,8 @@ def __need_to_wrap__(name):
def __convert_name__(inname):
if __need_to_keep__(inname):
return inname
if inname == 'maxid_layer':
return 'max_id'
elif inname.endswith('memory') or inname.endswith(
......@@ -74,8 +76,6 @@ def __convert_name__(inname):
for name in v1_layers.__all__:
obj = getattr(v1_layers, name)
if not __need_to_keep__(name):
continue
new_name = __convert_name__(name)
if callable(obj) and __need_to_wrap__(name):
globals()[new_name] = __convert_to_v2__(obj, new_name, __name__)
......@@ -107,7 +107,7 @@ __data_layer__.__doc__ = __map_data_docstr__(v1_layers.data_layer.__doc__)
data = __convert_to_v2__(__data_layer__, 'name', __name__)
def __get_used_layers__(output_layers, extra_layers=None):
def __get_used_layers__(output_layers):
layer_names = set()
parents = {}
......@@ -132,6 +132,13 @@ def __get_used_layers__(output_layers, extra_layers=None):
add_parent(mem.layer_name, mem.boot_layer_name)
add_parent(mem.link_name, mem.layer_name)
if sub_model.HasField('generator'):
# according to the implementation of text generation
# in recurrent layer group, the generated word must be
# the first out link
add_parent(sub_model.out_links[0].layer_name,
sub_model.generator.eos_layer_name)
def dfs_travel(layer_name):
if layer_name in layer_names:
return
......@@ -247,8 +254,8 @@ def __trim_submodel__(old_submodel, layer_names, input_layer_names,
def parse_network(output_layers, extra_layers=None):
if not isinstance(output_layers, collections.Sequence):
output_layers = [output_layers]
if extra_layers is not None and not isinstance(extra_layers,
collections.Sequence):
if extra_layers is not None:
if not isinstance(extra_layers, collections.Sequence):
extra_layers = [extra_layers]
else:
extra_layers = []
......@@ -262,18 +269,29 @@ def parse_network(output_layers, extra_layers=None):
model_config = ModelConfig()
model_config.type = cp.g_config.model_config.type
for layer in output_layers:
model_config.output_layer_names.append(layer.full_name)
output_layer_names.add(layer.full_name)
for l in cp.g_config.model_config.layers:
if l.name not in layer_names:
continue
model_config.layers.extend([l])
if l.type == 'data':
if l.name in model_config.output_layer_names:
"""
In text generation, the outlink to save the generated word
indices is a data_layer defined in recurrent_group. This
data_layer is sure to be the output of the network in text
generation task, so this statement excludes such a special
data_layer from being inputs of the network, otherwise an error
will occur during data feeding.
"""
continue
model_config.input_layer_names.append(l.name)
input_layer_names.add(l.name)
for layer in output_layers:
model_config.output_layer_names.append(layer.full_name)
output_layer_names.add(layer.full_name)
for e in cp.g_config.model_config.evaluators:
if e.name in evaluator_names:
model_config.evaluators.extend([e])
......
......@@ -31,7 +31,6 @@ class Topology(object):
def __init__(self, layers, extra_layers=None):
def __check__(layers):
if not isinstance(layers, collections.Sequence):
__check_layer_type__(layers)
layers = [layers]
for layer in layers:
__check_layer_type__(layer)
......@@ -91,6 +90,7 @@ class Topology(object):
[('image', dense_vector(768)), ('label', integer_value(10))]
"""
data_layers = self.data_layers()
return [(nm, data_layers[nm].data_type)
for nm in self.proto().input_layer_names]
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册