提交 ea8013e4 编写于 作者: Q qiaolongfei

can run test.py for generating

上级 f7be384e
import sys
import paddle.v2 as paddle
import paddle.v2.layer.beam_search as beam_search
def seqToseq_net(source_dict_dim, target_dict_dim):
def seqToseq_net(source_dict_dim, target_dict_dim, is_generating):
### Network Architecture
word_vector_dim = 512 # dimension of word vector
decoder_size = 512 # dimension of hidden unit in GRU Decoder network
encoder_size = 512 # dimension of hidden unit in GRU Encoder network
beam_size = 3
max_length = 250
#### Encoder
src_word_id = paddle.layer.data(
name='source_language_word',
......@@ -67,30 +71,63 @@ def seqToseq_net(source_dict_dim, target_dict_dim):
group_input2 = paddle.layer.StaticInputV2(input=encoded_proj, is_seq=True)
group_inputs = [group_input1, group_input2]
trg_embedding = paddle.layer.embedding(
input=paddle.layer.data(
name='target_language_word',
type=paddle.data_type.integer_value_sequence(target_dict_dim)),
size=word_vector_dim,
param_attr=paddle.attr.ParamAttr(name='_target_language_embedding'))
group_inputs.append(trg_embedding)
# For decoder equipped with attention mechanism, in training,
# target embeding (the groudtruth) is the data input,
# while encoded source sequence is accessed to as an unbounded memory.
# Here, the StaticInput defines a read-only memory
# for the recurrent_group.
decoder = paddle.layer.recurrent_group(
name=decoder_group_name,
step=gru_decoder_with_attention,
input=group_inputs)
lbl = paddle.layer.data(
name='target_language_next_word',
type=paddle.data_type.integer_value_sequence(target_dict_dim))
cost = paddle.layer.classification_cost(input=decoder, label=lbl)
return cost
if not is_generating:
trg_embedding = paddle.layer.embedding(
input=paddle.layer.data(
name='target_language_word',
type=paddle.data_type.integer_value_sequence(target_dict_dim)),
size=word_vector_dim,
param_attr=paddle.attr.ParamAttr(name='_target_language_embedding'))
group_inputs.append(trg_embedding)
# For decoder equipped with attention mechanism, in training,
# target embeding (the groudtruth) is the data input,
# while encoded source sequence is accessed to as an unbounded memory.
# Here, the StaticInput defines a read-only memory
# for the recurrent_group.
decoder = paddle.layer.recurrent_group(
name=decoder_group_name,
step=gru_decoder_with_attention,
input=group_inputs)
lbl = paddle.layer.data(
name='target_language_next_word',
type=paddle.data_type.integer_value_sequence(target_dict_dim))
cost = paddle.layer.classification_cost(input=decoder, label=lbl)
return cost
else:
# In generation, the decoder predicts a next target word based on
# the encoded source sequence and the last generated target word.
# The encoded source sequence (encoder's output) must be specified by
# StaticInput, which is a read-only memory.
# Embedding of the last generated word is automatically gotten by
# GeneratedInputs, which is initialized by a start mark, such as <s>,
# and must be included in generation.
trg_embedding = beam_search.GeneratedInputV2(
size=target_dict_dim,
embedding_name='_target_language_embedding',
embedding_size=word_vector_dim)
group_inputs.append(trg_embedding)
beam_gen = beam_search.beam_search(
name=decoder_group_name,
step=gru_decoder_with_attention,
input=group_inputs,
bos_id=0,
eos_id=1,
beam_size=beam_size,
max_length=max_length)
#
# seqtext_printer_evaluator(
# input=beam_gen,
# id_input=data_layer(
# name="sent_id", size=1),
# dict_file=trg_dict_path,
# result_file=gen_trans_file)
return beam_gen
def main():
......
......@@ -81,8 +81,10 @@ def gru_encoder_decoder(data_conf,
"""
for k, v in data_conf.iteritems():
globals()[k] = v
source_dict_dim = len(open(src_dict_path, "r").readlines())
target_dict_dim = len(open(trg_dict_path, "r").readlines())
#source_dict_dim = len(open(src_dict_path, "r").readlines())
#target_dict_dim = len(open(trg_dict_path, "r").readlines())
source_dict_dim = 1000
target_dict_dim = 2000
gen_trans_file = gen_result
src_word_id = data_layer(name='source_language_word', size=source_dict_dim)
......@@ -131,9 +133,8 @@ def gru_encoder_decoder(data_conf,
decoder_group_name = "decoder_group"
group_inputs = [
StaticInput(
input=encoded_vector, is_seq=True), StaticInput(
input=encoded_proj, is_seq=True)
StaticInput(input=encoded_vector, is_seq=True),
StaticInput(input=encoded_proj, is_seq=True)
]
if not is_generating:
......
......@@ -19,7 +19,8 @@ sys.path.append("..")
from seqToseq_net import *
# whether this config is used for generating
is_generating = False
#is_generating = False
is_generating = True
### Data Definiation
data_dir = "./data/pre-wmt14"
......
......@@ -76,6 +76,10 @@ class Layer(object):
"""
function to set proto attribute
"""
print "======"
# print self.name
print self.__parent_layers__
# print self.__context__
self.__context__ = context
# short cut if myself is parsed before.
......
......@@ -135,6 +135,10 @@ class WithExtraParent(Layer):
"""
function to set proto attribute
"""
print "*************"
# print context
print self.name
print self.__extra_parent__
kwargs = dict()
for p in self.__extra_parent__:
p.to_proto(context=context)
......@@ -162,11 +166,12 @@ class WithExtraParent(Layer):
class MemoryV2(WithExtraParent):
def __init__(self, name, **kwargs):
def __init__(self, name, extra_input=None, **kwargs):
self.name = name
super(MemoryV2, self).__init__(name=name, parent_layers=dict())
self.__kwargs__ = kwargs
self.__boot_layer_name__ = None
if 'boot_layer' in kwargs:
begin_of_current_rnn = []
# TODO(yuyang18): Fix inspect, it could be wrong when user invoke a
......@@ -223,22 +228,6 @@ class MemoryV2(WithExtraParent):
return True
class LayerOutputV2(Layer):
"""
LayerOutputV2 is used to store the result of LayerOutput in v1 api.
It will not store it's parents because layer_output has been parsed already.
"""
def __init__(self, layer_output):
assert isinstance(layer_output, conf_helps.LayerOutput)
self.layer_output = layer_output
super(LayerOutputV2, self).__init__(
name=layer_output.name, parent_layers=dict())
def to_proto_impl(self):
return self.layer_output
class StaticInputV2(object):
def __init__(self, input, is_seq=False, size=None):
assert isinstance(input, LayerV2)
......@@ -330,10 +319,15 @@ def mixed(size=0,
class RecurrentLayerInput(WithExtraParent):
def __init__(self, recurrent_name, index, parent_layers):
assert len(parent_layers) == 1
self.__parents__ = parent_layers.values()[0]
parents_len = len(parent_layers)
assert parents_len <= 1
if parents_len == 0:
self.__parents__ = []
else:
self.__parents__ = parent_layers.values()[0]
name = self.__parents__[index].name if index >= 0 else None
super(RecurrentLayerInput, self).__init__(
name=self.__parents__[index].name, parent_layers=parent_layers)
name=name, parent_layers=parent_layers)
self.__recurrent_name__ = recurrent_name
def context_name(self):
......@@ -346,6 +340,10 @@ class RecurrentLayerInput(WithExtraParent):
in_links=map(lambda x: x.name, self.__parents__))
return self
def use_context_name(self):
return True
class RecurrentLayerOutput(Layer):
def __init__(self, recurrent_name, index, parent_layers):
......@@ -428,6 +426,9 @@ def recurrent_group(step, input, name=None):
non_static_inputs = filter(lambda x: not isinstance(x, StaticInputV2),
input)
static_inputs = filter(lambda x: isinstance(x, StaticInputV2), input)
static_inputs = [static_input.input for static_input in static_inputs]
actual_input = [
RecurrentLayerInput(
recurrent_name=name,
......@@ -436,6 +437,13 @@ def recurrent_group(step, input, name=None):
for i in xrange(len(non_static_inputs))
]
extra_input = None
if len(non_static_inputs) == 0:
extra_input = RecurrentLayerInput(
recurrent_name=name,
index=-1,
parent_layers={})
def __real_step__(*args):
rnn_input = list(args)
static_inputs = filter(lambda x: isinstance(x, StaticInputV2), input)
......@@ -443,6 +451,7 @@ def recurrent_group(step, input, name=None):
mem_name = "__%s_memory__" % static_input.input.name
mem = memory(
name=mem_name,
extra_input=extra_input,
is_seq=static_input.is_seq,
size=static_input.input.calculate_size,
boot_layer=static_input.input)
......
import beam_search
\ No newline at end of file
import paddle.v2 as paddle
from paddle.v2.config_base import Layer
from paddle.trainer_config_helpers.default_decorators import wrap_name_default
from paddle.trainer_config_helpers.layers import RecurrentLayerGroupSetGenerator, Generator
class BaseGeneratedInputV2(object):
def __init__(self):
self.bos_id = None
self.eos_id = None
def before_real_step(self):
raise NotImplementedError()
def after_real_step(self, *args):
raise NotImplementedError()
class GeneratedInputV2(BaseGeneratedInputV2):
def __init__(self, size, embedding_name, embedding_size):
super(GeneratedInputV2, self).__init__()
self.size = size
self.embedding_name = embedding_name
self.embedding_size = embedding_size
def after_real_step(self, input):
return paddle.layer.max_id(input=input, name='__beam_search_predict__')
def before_real_step(self):
predict_id = paddle.layer.memory(
name='__beam_search_predict__',
size=self.size,
boot_with_const_id=self.bos_id)
trg_emb = paddle.layer.embedding(
input=predict_id,
size=self.embedding_size,
param_attr=paddle.attr.ParamAttr(name=self.embedding_name))
return trg_emb
class RecurrentLayerGroupSetGeneratorV2(Layer):
def __init__(self, eos_name, max_length, beam_size, num_results_per_sample):
self.eos_name = eos_name
self.max_length = max_length
self.beam_size = beam_size
self.num_results_per_sample = num_results_per_sample
super(RecurrentLayerGroupSetGeneratorV2, self).__init__(
name=eos_name, parent_layers={})
def to_proto_impl(self, **kwargs):
RecurrentLayerGroupSetGenerator(
Generator(
eos_layer_name=self.eos_name,
max_num_frames=self.max_length,
beam_size=self.beam_size,
num_results_per_sample=self.num_results_per_sample))
return self
def context_name(self):
return self.eos_name + ".fake"
def use_context_name(self):
return True
@wrap_name_default()
def beam_search(step,
input,
bos_id,
eos_id,
beam_size,
max_length=500,
name=None,
num_results_per_sample=None):
if num_results_per_sample is None:
num_results_per_sample = beam_size
assert num_results_per_sample <= beam_size
# logger.warning("num_results_per_sample should be less than beam_size")
if isinstance(input, paddle.layer.StaticInputV2) or isinstance(input, BaseGeneratedInputV2):
input = [input]
generated_input_index = -1
real_input = []
for i, each_input in enumerate(input):
assert isinstance(each_input, paddle.layer.StaticInputV2) or isinstance(
each_input, BaseGeneratedInputV2)
if isinstance(each_input, BaseGeneratedInputV2):
assert generated_input_index == -1
generated_input_index = i
else:
real_input.append(each_input)
assert generated_input_index != -1
gipt = input[generated_input_index]
assert isinstance(gipt, BaseGeneratedInputV2)
gipt.bos_id = bos_id
gipt.eos_id = eos_id
def __real_step__(*args):
eos_name = "__%s_eos_layer__" % name
generator = RecurrentLayerGroupSetGeneratorV2(
eos_name, max_length, beam_size, num_results_per_sample)
args = list(args)
before_step_layer = gipt.before_real_step()
before_step_layer.append_child(layer=generator,
parent_names=[before_step_layer.name])
args.insert(generated_input_index, before_step_layer)
predict = gipt.after_real_step(step(*args))
eos = paddle.layer.eos(input=predict, eos_id=eos_id, name=eos_name)
predict.append_child(layer=eos, parent_names=[predict.name])
return predict
# tmp = paddle.layer.recurrent_group(
# step=__real_step__,
# input=real_input,
# reverse=False,
# name=name,
# is_generating=True)
tmp = paddle.layer.recurrent_group(
step=__real_step__,
input=real_input,
name=name)
return tmp
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册