提交 ea8013e4 编写于 作者: Q qiaolongfei

can run test.py for generating

上级 f7be384e
import sys import sys
import paddle.v2 as paddle import paddle.v2 as paddle
import paddle.v2.layer.beam_search as beam_search
def seqToseq_net(source_dict_dim, target_dict_dim): def seqToseq_net(source_dict_dim, target_dict_dim, is_generating):
### Network Architecture ### Network Architecture
word_vector_dim = 512 # dimension of word vector word_vector_dim = 512 # dimension of word vector
decoder_size = 512 # dimension of hidden unit in GRU Decoder network decoder_size = 512 # dimension of hidden unit in GRU Decoder network
encoder_size = 512 # dimension of hidden unit in GRU Encoder network encoder_size = 512 # dimension of hidden unit in GRU Encoder network
beam_size = 3
max_length = 250
#### Encoder #### Encoder
src_word_id = paddle.layer.data( src_word_id = paddle.layer.data(
name='source_language_word', name='source_language_word',
...@@ -67,30 +71,63 @@ def seqToseq_net(source_dict_dim, target_dict_dim): ...@@ -67,30 +71,63 @@ def seqToseq_net(source_dict_dim, target_dict_dim):
group_input2 = paddle.layer.StaticInputV2(input=encoded_proj, is_seq=True) group_input2 = paddle.layer.StaticInputV2(input=encoded_proj, is_seq=True)
group_inputs = [group_input1, group_input2] group_inputs = [group_input1, group_input2]
trg_embedding = paddle.layer.embedding( if not is_generating:
input=paddle.layer.data( trg_embedding = paddle.layer.embedding(
name='target_language_word', input=paddle.layer.data(
type=paddle.data_type.integer_value_sequence(target_dict_dim)), name='target_language_word',
size=word_vector_dim, type=paddle.data_type.integer_value_sequence(target_dict_dim)),
param_attr=paddle.attr.ParamAttr(name='_target_language_embedding')) size=word_vector_dim,
group_inputs.append(trg_embedding) param_attr=paddle.attr.ParamAttr(name='_target_language_embedding'))
group_inputs.append(trg_embedding)
# For decoder equipped with attention mechanism, in training,
# target embeding (the groudtruth) is the data input, # For decoder equipped with attention mechanism, in training,
# while encoded source sequence is accessed to as an unbounded memory. # target embeding (the groudtruth) is the data input,
# Here, the StaticInput defines a read-only memory # while encoded source sequence is accessed to as an unbounded memory.
# for the recurrent_group. # Here, the StaticInput defines a read-only memory
decoder = paddle.layer.recurrent_group( # for the recurrent_group.
name=decoder_group_name, decoder = paddle.layer.recurrent_group(
step=gru_decoder_with_attention, name=decoder_group_name,
input=group_inputs) step=gru_decoder_with_attention,
input=group_inputs)
lbl = paddle.layer.data(
name='target_language_next_word', lbl = paddle.layer.data(
type=paddle.data_type.integer_value_sequence(target_dict_dim)) name='target_language_next_word',
cost = paddle.layer.classification_cost(input=decoder, label=lbl) type=paddle.data_type.integer_value_sequence(target_dict_dim))
cost = paddle.layer.classification_cost(input=decoder, label=lbl)
return cost
return cost
else:
# In generation, the decoder predicts a next target word based on
# the encoded source sequence and the last generated target word.
# The encoded source sequence (encoder's output) must be specified by
# StaticInput, which is a read-only memory.
# Embedding of the last generated word is automatically gotten by
# GeneratedInputs, which is initialized by a start mark, such as <s>,
# and must be included in generation.
trg_embedding = beam_search.GeneratedInputV2(
size=target_dict_dim,
embedding_name='_target_language_embedding',
embedding_size=word_vector_dim)
group_inputs.append(trg_embedding)
beam_gen = beam_search.beam_search(
name=decoder_group_name,
step=gru_decoder_with_attention,
input=group_inputs,
bos_id=0,
eos_id=1,
beam_size=beam_size,
max_length=max_length)
#
# seqtext_printer_evaluator(
# input=beam_gen,
# id_input=data_layer(
# name="sent_id", size=1),
# dict_file=trg_dict_path,
# result_file=gen_trans_file)
return beam_gen
def main(): def main():
......
...@@ -81,8 +81,10 @@ def gru_encoder_decoder(data_conf, ...@@ -81,8 +81,10 @@ def gru_encoder_decoder(data_conf,
""" """
for k, v in data_conf.iteritems(): for k, v in data_conf.iteritems():
globals()[k] = v globals()[k] = v
source_dict_dim = len(open(src_dict_path, "r").readlines()) #source_dict_dim = len(open(src_dict_path, "r").readlines())
target_dict_dim = len(open(trg_dict_path, "r").readlines()) #target_dict_dim = len(open(trg_dict_path, "r").readlines())
source_dict_dim = 1000
target_dict_dim = 2000
gen_trans_file = gen_result gen_trans_file = gen_result
src_word_id = data_layer(name='source_language_word', size=source_dict_dim) src_word_id = data_layer(name='source_language_word', size=source_dict_dim)
...@@ -131,9 +133,8 @@ def gru_encoder_decoder(data_conf, ...@@ -131,9 +133,8 @@ def gru_encoder_decoder(data_conf,
decoder_group_name = "decoder_group" decoder_group_name = "decoder_group"
group_inputs = [ group_inputs = [
StaticInput( StaticInput(input=encoded_vector, is_seq=True),
input=encoded_vector, is_seq=True), StaticInput( StaticInput(input=encoded_proj, is_seq=True)
input=encoded_proj, is_seq=True)
] ]
if not is_generating: if not is_generating:
......
...@@ -19,7 +19,8 @@ sys.path.append("..") ...@@ -19,7 +19,8 @@ sys.path.append("..")
from seqToseq_net import * from seqToseq_net import *
# whether this config is used for generating # whether this config is used for generating
is_generating = False #is_generating = False
is_generating = True
### Data Definiation ### Data Definiation
data_dir = "./data/pre-wmt14" data_dir = "./data/pre-wmt14"
......
...@@ -76,6 +76,10 @@ class Layer(object): ...@@ -76,6 +76,10 @@ class Layer(object):
""" """
function to set proto attribute function to set proto attribute
""" """
print "======"
# print self.name
print self.__parent_layers__
# print self.__context__
self.__context__ = context self.__context__ = context
# short cut if myself is parsed before. # short cut if myself is parsed before.
......
...@@ -135,6 +135,10 @@ class WithExtraParent(Layer): ...@@ -135,6 +135,10 @@ class WithExtraParent(Layer):
""" """
function to set proto attribute function to set proto attribute
""" """
print "*************"
# print context
print self.name
print self.__extra_parent__
kwargs = dict() kwargs = dict()
for p in self.__extra_parent__: for p in self.__extra_parent__:
p.to_proto(context=context) p.to_proto(context=context)
...@@ -162,11 +166,12 @@ class WithExtraParent(Layer): ...@@ -162,11 +166,12 @@ class WithExtraParent(Layer):
class MemoryV2(WithExtraParent): class MemoryV2(WithExtraParent):
def __init__(self, name, **kwargs): def __init__(self, name, extra_input=None, **kwargs):
self.name = name self.name = name
super(MemoryV2, self).__init__(name=name, parent_layers=dict()) super(MemoryV2, self).__init__(name=name, parent_layers=dict())
self.__kwargs__ = kwargs self.__kwargs__ = kwargs
self.__boot_layer_name__ = None self.__boot_layer_name__ = None
if 'boot_layer' in kwargs: if 'boot_layer' in kwargs:
begin_of_current_rnn = [] begin_of_current_rnn = []
# TODO(yuyang18): Fix inspect, it could be wrong when user invoke a # TODO(yuyang18): Fix inspect, it could be wrong when user invoke a
...@@ -223,22 +228,6 @@ class MemoryV2(WithExtraParent): ...@@ -223,22 +228,6 @@ class MemoryV2(WithExtraParent):
return True return True
class LayerOutputV2(Layer):
"""
LayerOutputV2 is used to store the result of LayerOutput in v1 api.
It will not store it's parents because layer_output has been parsed already.
"""
def __init__(self, layer_output):
assert isinstance(layer_output, conf_helps.LayerOutput)
self.layer_output = layer_output
super(LayerOutputV2, self).__init__(
name=layer_output.name, parent_layers=dict())
def to_proto_impl(self):
return self.layer_output
class StaticInputV2(object): class StaticInputV2(object):
def __init__(self, input, is_seq=False, size=None): def __init__(self, input, is_seq=False, size=None):
assert isinstance(input, LayerV2) assert isinstance(input, LayerV2)
...@@ -330,10 +319,15 @@ def mixed(size=0, ...@@ -330,10 +319,15 @@ def mixed(size=0,
class RecurrentLayerInput(WithExtraParent): class RecurrentLayerInput(WithExtraParent):
def __init__(self, recurrent_name, index, parent_layers): def __init__(self, recurrent_name, index, parent_layers):
assert len(parent_layers) == 1 parents_len = len(parent_layers)
self.__parents__ = parent_layers.values()[0] assert parents_len <= 1
if parents_len == 0:
self.__parents__ = []
else:
self.__parents__ = parent_layers.values()[0]
name = self.__parents__[index].name if index >= 0 else None
super(RecurrentLayerInput, self).__init__( super(RecurrentLayerInput, self).__init__(
name=self.__parents__[index].name, parent_layers=parent_layers) name=name, parent_layers=parent_layers)
self.__recurrent_name__ = recurrent_name self.__recurrent_name__ = recurrent_name
def context_name(self): def context_name(self):
...@@ -346,6 +340,10 @@ class RecurrentLayerInput(WithExtraParent): ...@@ -346,6 +340,10 @@ class RecurrentLayerInput(WithExtraParent):
in_links=map(lambda x: x.name, self.__parents__)) in_links=map(lambda x: x.name, self.__parents__))
return self return self
def use_context_name(self):
return True
class RecurrentLayerOutput(Layer): class RecurrentLayerOutput(Layer):
def __init__(self, recurrent_name, index, parent_layers): def __init__(self, recurrent_name, index, parent_layers):
...@@ -428,6 +426,9 @@ def recurrent_group(step, input, name=None): ...@@ -428,6 +426,9 @@ def recurrent_group(step, input, name=None):
non_static_inputs = filter(lambda x: not isinstance(x, StaticInputV2), non_static_inputs = filter(lambda x: not isinstance(x, StaticInputV2),
input) input)
static_inputs = filter(lambda x: isinstance(x, StaticInputV2), input)
static_inputs = [static_input.input for static_input in static_inputs]
actual_input = [ actual_input = [
RecurrentLayerInput( RecurrentLayerInput(
recurrent_name=name, recurrent_name=name,
...@@ -436,6 +437,13 @@ def recurrent_group(step, input, name=None): ...@@ -436,6 +437,13 @@ def recurrent_group(step, input, name=None):
for i in xrange(len(non_static_inputs)) for i in xrange(len(non_static_inputs))
] ]
extra_input = None
if len(non_static_inputs) == 0:
extra_input = RecurrentLayerInput(
recurrent_name=name,
index=-1,
parent_layers={})
def __real_step__(*args): def __real_step__(*args):
rnn_input = list(args) rnn_input = list(args)
static_inputs = filter(lambda x: isinstance(x, StaticInputV2), input) static_inputs = filter(lambda x: isinstance(x, StaticInputV2), input)
...@@ -443,6 +451,7 @@ def recurrent_group(step, input, name=None): ...@@ -443,6 +451,7 @@ def recurrent_group(step, input, name=None):
mem_name = "__%s_memory__" % static_input.input.name mem_name = "__%s_memory__" % static_input.input.name
mem = memory( mem = memory(
name=mem_name, name=mem_name,
extra_input=extra_input,
is_seq=static_input.is_seq, is_seq=static_input.is_seq,
size=static_input.input.calculate_size, size=static_input.input.calculate_size,
boot_layer=static_input.input) boot_layer=static_input.input)
......
import beam_search
\ No newline at end of file
import paddle.v2 as paddle
from paddle.v2.config_base import Layer
from paddle.trainer_config_helpers.default_decorators import wrap_name_default
from paddle.trainer_config_helpers.layers import RecurrentLayerGroupSetGenerator, Generator
class BaseGeneratedInputV2(object):
def __init__(self):
self.bos_id = None
self.eos_id = None
def before_real_step(self):
raise NotImplementedError()
def after_real_step(self, *args):
raise NotImplementedError()
class GeneratedInputV2(BaseGeneratedInputV2):
def __init__(self, size, embedding_name, embedding_size):
super(GeneratedInputV2, self).__init__()
self.size = size
self.embedding_name = embedding_name
self.embedding_size = embedding_size
def after_real_step(self, input):
return paddle.layer.max_id(input=input, name='__beam_search_predict__')
def before_real_step(self):
predict_id = paddle.layer.memory(
name='__beam_search_predict__',
size=self.size,
boot_with_const_id=self.bos_id)
trg_emb = paddle.layer.embedding(
input=predict_id,
size=self.embedding_size,
param_attr=paddle.attr.ParamAttr(name=self.embedding_name))
return trg_emb
class RecurrentLayerGroupSetGeneratorV2(Layer):
def __init__(self, eos_name, max_length, beam_size, num_results_per_sample):
self.eos_name = eos_name
self.max_length = max_length
self.beam_size = beam_size
self.num_results_per_sample = num_results_per_sample
super(RecurrentLayerGroupSetGeneratorV2, self).__init__(
name=eos_name, parent_layers={})
def to_proto_impl(self, **kwargs):
RecurrentLayerGroupSetGenerator(
Generator(
eos_layer_name=self.eos_name,
max_num_frames=self.max_length,
beam_size=self.beam_size,
num_results_per_sample=self.num_results_per_sample))
return self
def context_name(self):
return self.eos_name + ".fake"
def use_context_name(self):
return True
@wrap_name_default()
def beam_search(step,
input,
bos_id,
eos_id,
beam_size,
max_length=500,
name=None,
num_results_per_sample=None):
if num_results_per_sample is None:
num_results_per_sample = beam_size
assert num_results_per_sample <= beam_size
# logger.warning("num_results_per_sample should be less than beam_size")
if isinstance(input, paddle.layer.StaticInputV2) or isinstance(input, BaseGeneratedInputV2):
input = [input]
generated_input_index = -1
real_input = []
for i, each_input in enumerate(input):
assert isinstance(each_input, paddle.layer.StaticInputV2) or isinstance(
each_input, BaseGeneratedInputV2)
if isinstance(each_input, BaseGeneratedInputV2):
assert generated_input_index == -1
generated_input_index = i
else:
real_input.append(each_input)
assert generated_input_index != -1
gipt = input[generated_input_index]
assert isinstance(gipt, BaseGeneratedInputV2)
gipt.bos_id = bos_id
gipt.eos_id = eos_id
def __real_step__(*args):
eos_name = "__%s_eos_layer__" % name
generator = RecurrentLayerGroupSetGeneratorV2(
eos_name, max_length, beam_size, num_results_per_sample)
args = list(args)
before_step_layer = gipt.before_real_step()
before_step_layer.append_child(layer=generator,
parent_names=[before_step_layer.name])
args.insert(generated_input_index, before_step_layer)
predict = gipt.after_real_step(step(*args))
eos = paddle.layer.eos(input=predict, eos_id=eos_id, name=eos_name)
predict.append_child(layer=eos, parent_names=[predict.name])
return predict
# tmp = paddle.layer.recurrent_group(
# step=__real_step__,
# input=real_input,
# reverse=False,
# name=name,
# is_generating=True)
tmp = paddle.layer.recurrent_group(
step=__real_step__,
input=real_input,
name=name)
return tmp
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册