提交 0a326f39 编写于 作者: G guosheng

Update seq2seq to adapt to latest code.

上级 38fd12ef
...@@ -15,8 +15,6 @@ ...@@ -15,8 +15,6 @@
import logging import logging
import os import os
import io import io
import sys
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
import random import random
from functools import partial from functools import partial
...@@ -25,10 +23,10 @@ import paddle.fluid as fluid ...@@ -25,10 +23,10 @@ import paddle.fluid as fluid
from paddle.fluid.layers.utils import flatten from paddle.fluid.layers.utils import flatten
from paddle.fluid.io import DataLoader from paddle.fluid.io import DataLoader
from model import Input, set_device from hapi.model import Input, set_device
from args import parse_args from args import parse_args
from seq2seq_base import BaseInferModel from seq2seq_base import BaseInferModel
from seq2seq_attn import AttentionInferModel, AttentionGreedyInferModel from seq2seq_attn import AttentionInferModel
from reader import Seq2SeqDataset, Seq2SeqBatchSampler, SortType, prepare_infer_input from reader import Seq2SeqDataset, Seq2SeqBatchSampler, SortType, prepare_infer_input
...@@ -87,8 +85,7 @@ def do_predict(args): ...@@ -87,8 +85,7 @@ def do_predict(args):
num_workers=0, num_workers=0,
return_list=True) return_list=True)
# model_maker = AttentionInferModel if args.attention else BaseInferModel model_maker = AttentionInferModel if args.attention else BaseInferModel
model_maker = AttentionGreedyInferModel if args.attention else BaseInferModel
model = model_maker( model = model_maker(
args.src_vocab_size, args.src_vocab_size,
args.tar_vocab_size, args.tar_vocab_size,
......
...@@ -19,8 +19,9 @@ from paddle.fluid.initializer import UniformInitializer ...@@ -19,8 +19,9 @@ from paddle.fluid.initializer import UniformInitializer
from paddle.fluid.dygraph import Embedding, Linear, Layer from paddle.fluid.dygraph import Embedding, Linear, Layer
from paddle.fluid.layers import BeamSearchDecoder from paddle.fluid.layers import BeamSearchDecoder
from text import DynamicDecode, RNN, BasicLSTMCell, RNNCell from hapi.model import Model, Loss
from model import Model, Loss from hapi.text import DynamicDecode, RNN, BasicLSTMCell, RNNCell
from seq2seq_base import Encoder from seq2seq_base import Encoder
...@@ -238,92 +239,3 @@ class AttentionInferModel(AttentionModel): ...@@ -238,92 +239,3 @@ class AttentionInferModel(AttentionModel):
encoder_output=encoder_output, encoder_output=encoder_output,
encoder_padding_mask=encoder_padding_mask) encoder_padding_mask=encoder_padding_mask)
return rs return rs
class GreedyEmbeddingHelper(fluid.layers.GreedyEmbeddingHelper):
def __init__(self, embedding_fn, start_tokens, end_token):
if isinstance(start_tokens, int):
self.need_convert_start_tokens = True
self.start_token_value = start_tokens
super(GreedyEmbeddingHelper, self).__init__(embedding_fn, start_tokens,
end_token)
self.end_token = fluid.layers.create_global_var(
shape=[1], dtype="int64", value=end_token, persistable=True)
def initialize(self, batch_ref=None):
if getattr(self, "need_convert_start_tokens", False):
assert batch_ref is not None, (
"Need to give batch_ref to get batch size "
"to initialize the tensor for start tokens.")
self.start_tokens = fluid.layers.fill_constant_batch_size_like(
input=fluid.layers.utils.flatten(batch_ref)[0],
shape=[-1],
dtype="int64",
value=self.start_token_value,
input_dim_idx=0)
return super(GreedyEmbeddingHelper, self).initialize()
class BasicDecoder(fluid.layers.BasicDecoder):
def initialize(self, initial_cell_states):
(initial_inputs,
initial_finished) = self.helper.initialize(initial_cell_states)
return initial_inputs, initial_cell_states, initial_finished
class AttentionGreedyInferModel(AttentionModel):
def __init__(self,
src_vocab_size,
trg_vocab_size,
embed_dim,
hidden_size,
num_layers,
dropout_prob=0.,
bos_id=0,
eos_id=1,
beam_size=1,
max_out_len=256):
args = dict(locals())
args.pop("self")
args.pop("__class__", None) # py3
args.pop("beam_size", None)
self.bos_id = args.pop("bos_id")
self.eos_id = args.pop("eos_id")
self.max_out_len = args.pop("max_out_len")
super(AttentionGreedyInferModel, self).__init__(**args)
# dynamic decoder for inference
decoder_helper = GreedyEmbeddingHelper(
start_tokens=bos_id,
end_token=eos_id,
embedding_fn=self.decoder.embedder)
decoder = BasicDecoder(
cell=self.decoder.lstm_attention.cell,
helper=decoder_helper,
output_fn=self.decoder.output_layer)
self.greedy_search_decoder = DynamicDecode(
decoder, max_step_num=max_out_len, is_test=True)
def forward(self, src, src_length):
# encoding
encoder_output, encoder_final_state = self.encoder(src, src_length)
# decoder initial states
decoder_initial_states = [
encoder_final_state,
self.decoder.lstm_attention.cell.get_initial_states(
batch_ref=encoder_output, shape=[self.hidden_size])
]
# attention mask to avoid paying attention on padddings
src_mask = layers.sequence_mask(
src_length,
maxlen=layers.shape(src)[1],
dtype=encoder_output.dtype)
encoder_padding_mask = (src_mask - 1.0) * 1e9
encoder_padding_mask = layers.unsqueeze(encoder_padding_mask, [1])
# dynamic decoding with greedy search
rs, _ = self.greedy_search_decoder(
inits=decoder_initial_states,
encoder_output=encoder_output,
encoder_padding_mask=encoder_padding_mask)
return rs.sample_ids
...@@ -18,8 +18,9 @@ from paddle.fluid import ParamAttr ...@@ -18,8 +18,9 @@ from paddle.fluid import ParamAttr
from paddle.fluid.initializer import UniformInitializer from paddle.fluid.initializer import UniformInitializer
from paddle.fluid.dygraph import Embedding, Linear, Layer from paddle.fluid.dygraph import Embedding, Linear, Layer
from paddle.fluid.layers import BeamSearchDecoder from paddle.fluid.layers import BeamSearchDecoder
from text import DynamicDecode, RNN, BasicLSTMCell, RNNCell
from model import Model, Loss from hapi.model import Model, Loss
from hapi.text import DynamicDecode, RNN, BasicLSTMCell, RNNCell
class CrossEntropyCriterion(Loss): class CrossEntropyCriterion(Loss):
...@@ -200,44 +201,3 @@ class BaseInferModel(BaseModel): ...@@ -200,44 +201,3 @@ class BaseInferModel(BaseModel):
# dynamic decoding with beam search # dynamic decoding with beam search
rs, _ = self.beam_search_decoder(inits=encoder_final_states) rs, _ = self.beam_search_decoder(inits=encoder_final_states)
return rs return rs
class BaseGreedyInferModel(BaseModel):
def __init__(self,
src_vocab_size,
trg_vocab_size,
embed_dim,
hidden_size,
num_layers,
dropout_prob=0.,
bos_id=0,
eos_id=1,
beam_size=1,
max_out_len=256):
args = dict(locals())
args.pop("self")
args.pop("__class__", None) # py3
args.pop("beam_size", None)
self.bos_id = args.pop("bos_id")
self.eos_id = args.pop("eos_id")
self.max_out_len = args.pop("max_out_len")
super(BaseGreedyInferModel, self).__init__(**args)
# dynamic decoder for inference
decoder_helper = GreedyEmbeddingHelper(
start_tokens=bos_id,
end_token=eos_id,
embedding_fn=self.decoder.embedder)
decoder = BasicDecoder(
cell=self.decoder.stack_lstm.cell,
helper=decoder_helper,
output_fn=self.decoder.output_layer)
self.greedy_search_decoder = DynamicDecode(
decoder, max_step_num=max_out_len, is_test=True)
def forward(self, src, src_length):
# encoding
encoder_output, encoder_final_states = self.encoder(src, src_length)
# dynamic decoding with greedy search
rs, _ = self.greedy_search_decoder(inits=encoder_final_states)
return rs.sample_ids
...@@ -14,8 +14,6 @@ ...@@ -14,8 +14,6 @@
import logging import logging
import os import os
import sys
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
import random import random
from functools import partial from functools import partial
...@@ -23,9 +21,7 @@ import numpy as np ...@@ -23,9 +21,7 @@ import numpy as np
import paddle.fluid as fluid import paddle.fluid as fluid
from paddle.fluid.io import DataLoader from paddle.fluid.io import DataLoader
from model import Input, set_device from hapi.model import Input, set_device
from metrics import Metric
from callbacks import ProgBarLogger
from args import parse_args from args import parse_args
from seq2seq_base import BaseModel, CrossEntropyCriterion from seq2seq_base import BaseModel, CrossEntropyCriterion
from seq2seq_attn import AttentionModel from seq2seq_attn import AttentionModel
......
...@@ -15,8 +15,8 @@ ...@@ -15,8 +15,8 @@
import numpy as np import numpy as np
import paddle.fluid as fluid import paddle.fluid as fluid
from metrics import Metric from hapi.metrics import Metric
from callbacks import ProgBarLogger from hapi.callbacks import ProgBarLogger
class TrainCallback(ProgBarLogger): class TrainCallback(ProgBarLogger):
......
...@@ -238,8 +238,9 @@ class BasicLSTMCell(RNNCell): ...@@ -238,8 +238,9 @@ class BasicLSTMCell(RNNCell):
self._bias_attr = bias_attr self._bias_attr = bias_attr
self._gate_activation = gate_activation or layers.sigmoid self._gate_activation = gate_activation or layers.sigmoid
self._activation = activation or layers.tanh self._activation = activation or layers.tanh
self._forget_bias = layers.fill_constant( # TODO(guosheng): find better way to resolve constants in __init__
[1], dtype=dtype, value=forget_bias) self._forget_bias = layers.create_global_var(
shape=[1], dtype=dtype, value=forget_bias, persistable=True)
self._forget_bias.stop_gradient = False self._forget_bias.stop_gradient = False
self._dtype = dtype self._dtype = dtype
self._input_size = input_size self._input_size = input_size
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册