未验证 提交 e7d44a20 编写于 作者: Q Qiao Longfei 提交者: GitHub

Nmt model (#7340)

neural machine translation model support beam search with while op
上级 d8b923ab
...@@ -22,7 +22,7 @@ The current `LoDTensor` is designed to store levels of variable-length sequences ...@@ -22,7 +22,7 @@ The current `LoDTensor` is designed to store levels of variable-length sequences
The integers in each level represent the begin and end (not inclusive) offset of a sequence **in the underlying tensor**, The integers in each level represent the begin and end (not inclusive) offset of a sequence **in the underlying tensor**,
let's call this format the **absolute-offset LoD** for clarity. let's call this format the **absolute-offset LoD** for clarity.
The relative-offset LoD can retrieve any sequence very quickly but fails to represent empty sequences, for example, a two-level LoD is as follows The absolute-offset LoD can retrieve any sequence very quickly but fails to represent empty sequences, for example, a two-level LoD is as follows
```python ```python
[[0, 3, 9] [[0, 3, 9]
[0, 2, 3, 3, 3, 9]] [0, 2, 3, 3, 3, 9]]
...@@ -119,7 +119,7 @@ def generate(): ...@@ -119,7 +119,7 @@ def generate():
encoder_ctx_expanded = pd.lod_expand(encoder_ctx, target_word) encoder_ctx_expanded = pd.lod_expand(encoder_ctx, target_word)
decoder_input = pd.fc( decoder_input = pd.fc(
act=pd.activation.Linear(), act=pd.activation.Linear(),
input=[target_word, encoder_ctx], input=[target_word, encoder_ctx_expanded],
size=3 * decoder_dim) size=3 * decoder_dim)
gru_out, cur_mem = pd.gru_step( gru_out, cur_mem = pd.gru_step(
decoder_input, mem=decoder_mem, size=decoder_dim) decoder_input, mem=decoder_mem, size=decoder_dim)
......
...@@ -116,8 +116,9 @@ void Executor::Run(const ProgramDesc& pdesc, Scope* scope, int block_id, ...@@ -116,8 +116,9 @@ void Executor::Run(const ProgramDesc& pdesc, Scope* scope, int block_id,
for (auto& op_desc : block.AllOps()) { for (auto& op_desc : block.AllOps()) {
auto op = paddle::framework::OpRegistry::CreateOp(*op_desc); auto op = paddle::framework::OpRegistry::CreateOp(*op_desc);
VLOG(3) << op->DebugStringEx(local_scope); VLOG(4) << op->DebugStringEx(local_scope);
op->Run(*local_scope, place_); op->Run(*local_scope, place_);
VLOG(3) << op->DebugStringEx(local_scope);
if (FLAGS_do_memory_benchmark) { if (FLAGS_do_memory_benchmark) {
VLOG(2) << "Memory used after operator " + op->Type() + " running: " VLOG(2) << "Memory used after operator " + op->Type() + " running: "
<< memory::memory_usage(place_); << memory::memory_usage(place_);
......
...@@ -107,9 +107,10 @@ LoD ToAbsOffset(const LoD &in) { ...@@ -107,9 +107,10 @@ LoD ToAbsOffset(const LoD &in) {
// the lowest level stores relative offsets // the lowest level stores relative offsets
if (in.empty() || in.size() == 1) return in; if (in.empty() || in.size() == 1) return in;
LoD result = in; LoD result = in;
for (int level = result.size() - 2; level >= 0; level--) { for (auto level = static_cast<int>(in.size() - 2); level >= 0; level--) {
for (auto &ele : result[level]) { for (size_t i = 0; i < in[level].size(); ++i) {
ele = result[level + 1][ele]; size_t index = in[level][i];
result[level][i] = result[level + 1][index];
} }
} }
return result; return result;
......
...@@ -24,8 +24,18 @@ namespace operators { ...@@ -24,8 +24,18 @@ namespace operators {
void BeamSearch::operator()(const framework::LoDTensor &pre_ids, void BeamSearch::operator()(const framework::LoDTensor &pre_ids,
framework::LoDTensor *selected_ids, framework::LoDTensor *selected_ids,
framework::LoDTensor *selected_scores) { framework::LoDTensor *selected_scores) {
auto abs_lod = framework::ToAbsOffset(ids_->lod());
auto &high_level = abs_lod[lod_level_];
auto items = SelectTopBeamSizeItems(); auto items = SelectTopBeamSizeItems();
auto selected_items = ToMap(items); auto selected_items = ToMap(items, high_level.back());
VLOG(3) << "selected_items:";
for (size_t i = 0; i < selected_items.size(); ++i) {
VLOG(3) << "offset:" << i;
for (auto &item : selected_items[i]) {
VLOG(3) << ItemToString(item);
}
}
PruneEndidCandidates(pre_ids, &selected_items); PruneEndidCandidates(pre_ids, &selected_items);
// calculate the output tensor's height // calculate the output tensor's height
size_t num_instances = std::accumulate( size_t num_instances = std::accumulate(
...@@ -63,11 +73,12 @@ void BeamSearch::operator()(const framework::LoDTensor &pre_ids, ...@@ -63,11 +73,12 @@ void BeamSearch::operator()(const framework::LoDTensor &pre_ids,
low_level.push_back(low_offset); low_level.push_back(low_offset);
// fill lod // fill lod
auto abs_lod = framework::ToAbsOffset(ids_->lod());
auto &high_level = abs_lod[lod_level_];
framework::LoD lod(2); framework::LoD lod(2);
lod[0].assign(high_level.begin(), high_level.end()); lod[0].assign(high_level.begin(), high_level.end());
lod[1].assign(low_level.begin(), low_level.end()); lod[1].assign(low_level.begin(), low_level.end());
if (!framework::CheckLoD(lod)) {
PADDLE_THROW("lod %s is not right", framework::LoDToString(lod));
}
selected_ids->set_lod(lod); selected_ids->set_lod(lod);
selected_scores->set_lod(lod); selected_scores->set_lod(lod);
} }
...@@ -90,13 +101,11 @@ int BeamSearch::PruneEndidCandidates(const framework::LoDTensor &pre_ids, ...@@ -90,13 +101,11 @@ int BeamSearch::PruneEndidCandidates(const framework::LoDTensor &pre_ids,
} }
std::vector<std::vector<BeamSearch::Item>> BeamSearch::ToMap( std::vector<std::vector<BeamSearch::Item>> BeamSearch::ToMap(
const std::vector<std::vector<Item>> &items) { const std::vector<std::vector<Item>> &items, size_t element_num) {
std::vector<std::vector<Item>> result; std::vector<std::vector<Item>> result;
result.resize(element_num);
for (auto &entries : items) { for (auto &entries : items) {
for (const auto &item : entries) { for (const auto &item : entries) {
if (item.offset >= result.size()) {
result.resize(item.offset + 1);
}
result[item.offset].push_back(item); result[item.offset].push_back(item);
} }
} }
...@@ -122,6 +131,14 @@ BeamSearch::SelectTopBeamSizeItems() { ...@@ -122,6 +131,14 @@ BeamSearch::SelectTopBeamSizeItems() {
} }
result.emplace_back(items); result.emplace_back(items);
} }
VLOG(3) << "SelectTopBeamSizeItems result size " << result.size();
for (auto &items : result) {
VLOG(3) << "item set:";
for (auto &item : items) {
VLOG(3) << ItemToString(item);
}
}
return result; return result;
} }
...@@ -159,6 +176,22 @@ bool BeamSearch::NextItemSet(std::vector<BeamSearch::Item> *items) { ...@@ -159,6 +176,22 @@ bool BeamSearch::NextItemSet(std::vector<BeamSearch::Item> *items) {
return true; return true;
} }
std::ostream &operator<<(std::ostream &os, const BeamSearch::Item &item) {
os << "{";
os << "offset: " << item.offset << ", ";
os << "id: " << item.id << ", ";
os << "score: " << item.score << "";
os << "}";
return os;
}
std::string ItemToString(const BeamSearch::Item &item) {
std::ostringstream stream;
stream << item;
return stream.str();
}
class BeamSearchProtoAndCheckerMaker class BeamSearchProtoAndCheckerMaker
: public framework::OpProtoAndCheckerMaker { : public framework::OpProtoAndCheckerMaker {
public: public:
...@@ -186,8 +219,40 @@ class BeamSearchProtoAndCheckerMaker ...@@ -186,8 +219,40 @@ class BeamSearchProtoAndCheckerMaker
} }
}; };
class BeamSearchInferShape : public framework::InferShapeBase {
public:
void operator()(framework::InferShapeContext *context) const override {
for (const std::string &arg :
std::vector<std::string>({"pre_ids", "ids", "scores"})) {
PADDLE_ENFORCE(context->HasInput(arg),
"BeamSearch need input argument '%s'", arg);
}
for (const std::string &arg :
std::vector<std::string>({"selected_ids", "selected_scores"})) {
PADDLE_ENFORCE(context->HasOutput(arg),
"BeamSearch need output argument '%s'", arg);
}
}
};
class BeamSearchInferVarType : public framework::VarTypeInference {
public:
void operator()(const framework::OpDesc &op_desc,
framework::BlockDesc *block) const override {
for (auto &o : op_desc.Output("selected_ids")) {
block->Var(o)->SetType(framework::proto::VarDesc::LOD_TENSOR);
}
for (auto &o : op_desc.Output("selected_scores")) {
block->Var(o)->SetType(framework::proto::VarDesc::LOD_TENSOR);
}
}
};
} // namespace operators } // namespace operators
} // namespace paddle } // namespace paddle
REGISTER_OP_WITHOUT_GRADIENT(beam_search, paddle::operators::BeamSearchOp, REGISTER_OPERATOR(beam_search, paddle::operators::BeamSearchOp,
paddle::operators::BeamSearchProtoAndCheckerMaker); paddle::operators::BeamSearchProtoAndCheckerMaker,
paddle::operators::BeamSearchInferShape,
paddle::operators::BeamSearchInferVarType,
paddle::framework::EmptyGradOpMaker);
...@@ -136,8 +136,6 @@ class BeamSearch { ...@@ -136,8 +136,6 @@ class BeamSearch {
void operator()(const framework::LoDTensor& pre_ids, void operator()(const framework::LoDTensor& pre_ids,
framework::LoDTensor* selected_ids, framework::LoDTensor* selected_ids,
framework::LoDTensor* selected_scores); framework::LoDTensor* selected_scores);
protected:
/* /*
* The basic items help to sort. * The basic items help to sort.
*/ */
...@@ -155,6 +153,7 @@ class BeamSearch { ...@@ -155,6 +153,7 @@ class BeamSearch {
score_t score; score_t score;
}; };
protected:
/* /*
* Delete all the records that follows the end token. * Delete all the records that follows the end token.
*/ */
...@@ -166,7 +165,7 @@ class BeamSearch { ...@@ -166,7 +165,7 @@ class BeamSearch {
* NOTE low performance * NOTE low performance
*/ */
std::vector<std::vector<Item>> ToMap( std::vector<std::vector<Item>> ToMap(
const std::vector<std::vector<Item>>& inputs); const std::vector<std::vector<Item>>& inputs, size_t element_num);
/* /*
* For each source, select top beam_size records. * For each source, select top beam_size records.
...@@ -187,6 +186,10 @@ class BeamSearch { ...@@ -187,6 +186,10 @@ class BeamSearch {
int end_id_{0}; int end_id_{0};
}; };
std::ostream& operator<<(std::ostream& os, const BeamSearch::Item& item);
std::string ItemToString(const BeamSearch::Item& item);
class BeamSearchOp : public framework::OperatorBase { class BeamSearchOp : public framework::OperatorBase {
public: public:
BeamSearchOp(const std::string& type, BeamSearchOp(const std::string& type,
...@@ -203,7 +206,6 @@ class BeamSearchOp : public framework::OperatorBase { ...@@ -203,7 +206,6 @@ class BeamSearchOp : public framework::OperatorBase {
void Run(const framework::Scope& scope, void Run(const framework::Scope& scope,
const platform::Place& dev_place) const override { const platform::Place& dev_place) const override {
LOG(INFO) << "run beam search op";
auto ids_var = scope.FindVar(Input("ids")); auto ids_var = scope.FindVar(Input("ids"));
auto scores_var = scope.FindVar(Input("scores")); auto scores_var = scope.FindVar(Input("scores"));
auto pre_ids_var = scope.FindVar(Input("pre_ids")); auto pre_ids_var = scope.FindVar(Input("pre_ids"));
...@@ -217,10 +219,8 @@ class BeamSearchOp : public framework::OperatorBase { ...@@ -217,10 +219,8 @@ class BeamSearchOp : public framework::OperatorBase {
size_t level = Attr<int>("level"); size_t level = Attr<int>("level");
size_t beam_size = Attr<int>("beam_size"); size_t beam_size = Attr<int>("beam_size");
int end_id = Attr<int>("end_id"); int end_id = Attr<int>("end_id");
LOG(INFO) << "init beam search";
BeamSearch alg(ids, scores, level, beam_size, end_id); BeamSearch alg(ids, scores, level, beam_size, end_id);
LOG(INFO) << "after beam search";
auto selected_ids_var = scope.FindVar(Output("selected_ids")); auto selected_ids_var = scope.FindVar(Output("selected_ids"));
auto selected_scores_var = scope.FindVar(Output("selected_scores")); auto selected_scores_var = scope.FindVar(Output("selected_scores"));
PADDLE_ENFORCE_NOT_NULL(selected_ids_var); PADDLE_ENFORCE_NOT_NULL(selected_ids_var);
...@@ -229,9 +229,7 @@ class BeamSearchOp : public framework::OperatorBase { ...@@ -229,9 +229,7 @@ class BeamSearchOp : public framework::OperatorBase {
*selected_ids_var->GetMutable<framework::LoDTensor>(); *selected_ids_var->GetMutable<framework::LoDTensor>();
auto& selected_scores_tensor = auto& selected_scores_tensor =
*selected_scores_var->GetMutable<framework::LoDTensor>(); *selected_scores_var->GetMutable<framework::LoDTensor>();
LOG(INFO) << "run beam search";
alg(pre_ids, &selected_ids_tensor, &selected_scores_tensor); alg(pre_ids, &selected_ids_tensor, &selected_scores_tensor);
LOG(INFO) << "finish beam search";
} }
}; };
......
...@@ -32,6 +32,7 @@ class SequenceExpandKernel : public framework::OpKernel<T> { ...@@ -32,6 +32,7 @@ class SequenceExpandKernel : public framework::OpKernel<T> {
const T* x_data = x->data<T>(); const T* x_data = x->data<T>();
auto x_dims = x->dims(); auto x_dims = x->dims();
auto* y = context.Input<LoDTensor>("Y"); auto* y = context.Input<LoDTensor>("Y");
PADDLE_ENFORCE(!y->lod().empty(), "y should have lod");
PADDLE_ENFORCE_EQ(static_cast<size_t>(x_dims[0]), PADDLE_ENFORCE_EQ(static_cast<size_t>(x_dims[0]),
y->lod().back().size() - 1, y->lod().back().size() - 1,
"The size of last lod level in Input(Y)" "The size of last lod level in Input(Y)"
......
...@@ -22,6 +22,7 @@ namespace paddle { ...@@ -22,6 +22,7 @@ namespace paddle {
namespace operators { namespace operators {
using Tensor = framework::Tensor; using Tensor = framework::Tensor;
using LoDTensor = framework::LoDTensor;
template <typename T, int MajorType = Eigen::RowMajor, template <typename T, int MajorType = Eigen::RowMajor,
typename IndexType = Eigen::DenseIndex> typename IndexType = Eigen::DenseIndex>
...@@ -33,9 +34,9 @@ class TopkKernel : public framework::OpKernel<T> { ...@@ -33,9 +34,9 @@ class TopkKernel : public framework::OpKernel<T> {
void Compute(const framework::ExecutionContext& ctx) const override { void Compute(const framework::ExecutionContext& ctx) const override {
// Get the top k elements of each row of input tensor // Get the top k elements of each row of input tensor
// FIXME: only deal with matrix(2d tensor). // FIXME: only deal with matrix(2d tensor).
auto* input = ctx.Input<Tensor>("X"); auto* input = ctx.Input<LoDTensor>("X");
auto* output = ctx.Output<Tensor>("Out"); auto* output = ctx.Output<LoDTensor>("Out");
auto* indices = ctx.Output<Tensor>("Indices"); auto* indices = ctx.Output<LoDTensor>("Indices");
// k is determined by Attr // k is determined by Attr
const size_t k = static_cast<int>(ctx.Attr<int>("k")); const size_t k = static_cast<int>(ctx.Attr<int>("k"));
......
...@@ -100,7 +100,8 @@ class LayerHelper(object): ...@@ -100,7 +100,8 @@ class LayerHelper(object):
if dtype is None: if dtype is None:
dtype = each.dtype dtype = each.dtype
elif dtype != each.dtype: elif dtype != each.dtype:
raise ValueError("Data Type mismatch") raise ValueError("Data Type mismatch: %d to %d" %
(dtype, each.dtype))
return dtype return dtype
def create_parameter(self, def create_parameter(self,
......
...@@ -769,7 +769,7 @@ def topk(input, k): ...@@ -769,7 +769,7 @@ def topk(input, k):
array = fluid.layers.topk(x, k) array = fluid.layers.topk(x, k)
""" """
helper = LayerHelper('topk', **locals()) helper = LayerHelper('topk', **locals())
topk_out = helper.create_tmp_variable(dtype=input.data_type) topk_out = helper.create_tmp_variable(dtype=input.dtype)
topk_indices = helper.create_tmp_variable(dtype='int64') topk_indices = helper.create_tmp_variable(dtype='int64')
helper.append_op( helper.append_op(
type='top_k', type='top_k',
......
...@@ -61,6 +61,7 @@ __all__ = [ ...@@ -61,6 +61,7 @@ __all__ = [
'transpose', 'transpose',
'im2sequence', 'im2sequence',
'nce', 'nce',
'beam_search',
] ]
...@@ -163,10 +164,8 @@ def fc(input, ...@@ -163,10 +164,8 @@ def fc(input,
tmp = helper.create_tmp_variable(dtype) tmp = helper.create_tmp_variable(dtype)
helper.append_op( helper.append_op(
type="mul", type="mul",
inputs={ inputs={"X": input_var,
"X": input_var, "Y": w},
"Y": w,
},
outputs={"Out": tmp}, outputs={"Out": tmp},
attrs={"x_num_col_dims": num_flatten_dims, attrs={"x_num_col_dims": num_flatten_dims,
"y_num_col_dims": 1}) "y_num_col_dims": 1})
...@@ -1551,6 +1550,38 @@ def sequence_expand(x, y, name=None): ...@@ -1551,6 +1550,38 @@ def sequence_expand(x, y, name=None):
return tmp return tmp
def beam_search(pre_ids, ids, scores, beam_size, end_id, level=0):
'''
This function implements the beam search algorithm.
'''
helper = LayerHelper('beam_search', **locals())
score_type = scores.dtype
id_type = ids.dtype
selected_scores = helper.create_tmp_variable(dtype=score_type)
selected_ids = helper.create_tmp_variable(dtype=id_type)
helper.append_op(
type='beam_search',
inputs={
'pre_ids': pre_ids,
'ids': ids,
'scores': scores,
},
outputs={
'selected_ids': selected_ids,
'selected_scores': selected_scores,
},
attrs={
# TODO(ChunweiYan) to assure other value support
'level': level,
'beam_size': beam_size,
'end_id': end_id,
})
return selected_ids, selected_scores
def lstm_unit(x_t, def lstm_unit(x_t,
hidden_t_prev, hidden_t_prev,
cell_t_prev, cell_t_prev,
......
...@@ -17,7 +17,7 @@ import paddle.v2 as paddle ...@@ -17,7 +17,7 @@ import paddle.v2 as paddle
import paddle.v2.fluid as fluid import paddle.v2.fluid as fluid
import paddle.v2.fluid.core as core import paddle.v2.fluid.core as core
import paddle.v2.fluid.framework as framework import paddle.v2.fluid.framework as framework
import paddle.v2.fluid.layers as layers import paddle.v2.fluid.layers as pd
from paddle.v2.fluid.executor import Executor from paddle.v2.fluid.executor import Executor
dict_size = 30000 dict_size = 30000
...@@ -26,53 +26,136 @@ src_dict, trg_dict = paddle.dataset.wmt14.get_dict(dict_size) ...@@ -26,53 +26,136 @@ src_dict, trg_dict = paddle.dataset.wmt14.get_dict(dict_size)
hidden_dim = 32 hidden_dim = 32
word_dim = 16 word_dim = 16
IS_SPARSE = True IS_SPARSE = True
batch_size = 10 batch_size = 2
max_length = 50 max_length = 8
topk_size = 50 topk_size = 50
trg_dic_size = 10000 trg_dic_size = 10000
beam_size = 2
decoder_size = hidden_dim decoder_size = hidden_dim
place = core.CPUPlace()
def encoder_decoder():
def encoder():
# encoder # encoder
src_word_id = layers.data( src_word_id = pd.data(
name="src_word_id", shape=[1], dtype='int64', lod_level=1) name="src_word_id", shape=[1], dtype='int64', lod_level=1)
src_embedding = layers.embedding( src_embedding = pd.embedding(
input=src_word_id, input=src_word_id,
size=[dict_size, word_dim], size=[dict_size, word_dim],
dtype='float32', dtype='float32',
is_sparse=IS_SPARSE, is_sparse=IS_SPARSE,
param_attr=fluid.ParamAttr(name='vemb')) param_attr=fluid.ParamAttr(name='vemb'))
fc1 = fluid.layers.fc(input=src_embedding, size=hidden_dim * 4, act='tanh') fc1 = pd.fc(input=src_embedding, size=hidden_dim * 4, act='tanh')
lstm_hidden0, lstm_0 = layers.dynamic_lstm(input=fc1, size=hidden_dim * 4) lstm_hidden0, lstm_0 = pd.dynamic_lstm(input=fc1, size=hidden_dim * 4)
encoder_out = layers.sequence_last_step(input=lstm_hidden0) encoder_out = pd.sequence_last_step(input=lstm_hidden0)
return encoder_out
def decoder_train(context):
# decoder # decoder
trg_language_word = layers.data( trg_language_word = pd.data(
name="target_language_word", shape=[1], dtype='int64', lod_level=1) name="target_language_word", shape=[1], dtype='int64', lod_level=1)
trg_embedding = layers.embedding( trg_embedding = pd.embedding(
input=trg_language_word, input=trg_language_word,
size=[dict_size, word_dim], size=[dict_size, word_dim],
dtype='float32', dtype='float32',
is_sparse=IS_SPARSE, is_sparse=IS_SPARSE,
param_attr=fluid.ParamAttr(name='vemb')) param_attr=fluid.ParamAttr(name='vemb'))
rnn = fluid.layers.DynamicRNN() rnn = pd.DynamicRNN()
with rnn.block(): with rnn.block():
current_word = rnn.step_input(trg_embedding) current_word = rnn.step_input(trg_embedding)
mem = rnn.memory(init=encoder_out) pre_state = rnn.memory(init=context)
fc1 = fluid.layers.fc(input=[current_word, mem], current_state = pd.fc(input=[current_word, pre_state],
size=decoder_size, size=decoder_size,
act='tanh') act='tanh')
out = fluid.layers.fc(input=fc1, size=target_dict_dim, act='softmax')
rnn.update_memory(mem, fc1) current_score = pd.fc(input=current_state,
rnn.output(out) size=target_dict_dim,
act='softmax')
rnn.update_memory(pre_state, current_state)
rnn.output(current_score)
return rnn() return rnn()
def decoder_decode(context):
init_state = context
array_len = pd.fill_constant(shape=[1], dtype='int64', value=max_length)
counter = pd.zeros(shape=[1], dtype='int64')
# fill the first element with init_state
state_array = pd.create_array('float32')
pd.array_write(init_state, array=state_array, i=counter)
# ids, scores as memory
ids_array = pd.create_array('int64')
scores_array = pd.create_array('float32')
init_ids = pd.data(name="init_ids", shape=[1], dtype="int64", lod_level=2)
init_scores = pd.data(
name="init_scores", shape=[1], dtype="float32", lod_level=2)
pd.array_write(init_ids, array=ids_array, i=counter)
pd.array_write(init_scores, array=scores_array, i=counter)
cond = pd.less_than(x=counter, y=array_len)
while_op = pd.While(cond=cond)
with while_op.block():
pre_ids = pd.array_read(array=ids_array, i=counter)
pre_state = pd.array_read(array=state_array, i=counter)
pre_score = pd.array_read(array=scores_array, i=counter)
# expand the lod of pre_state to be the same with pre_score
pre_state_expanded = pd.sequence_expand(pre_state, pre_score)
pre_ids_emb = pd.embedding(
input=pre_ids,
size=[dict_size, word_dim],
dtype='float32',
is_sparse=IS_SPARSE)
# use rnn unit to update rnn
current_state = pd.fc(input=[pre_ids_emb, pre_state_expanded],
size=decoder_size,
act='tanh')
# use score to do beam search
current_score = pd.fc(input=current_state,
size=target_dict_dim,
act='softmax')
topk_scores, topk_indices = pd.topk(current_score, k=50)
selected_ids, selected_scores = pd.beam_search(
pre_ids, topk_indices, topk_scores, beam_size, end_id=10, level=0)
pd.increment(x=counter, value=1, in_place=True)
# update the memories
pd.array_write(current_state, array=state_array, i=counter)
pd.array_write(selected_ids, array=ids_array, i=counter)
pd.array_write(selected_scores, array=scores_array, i=counter)
pd.less_than(x=counter, y=array_len, cond=cond)
translation_ids, translation_scores = pd.beam_search_decode(
ids=ids_array, scores=scores_array)
# return init_ids, init_scores
return translation_ids, translation_scores
def set_init_lod(data, lod, place):
res = core.LoDTensor()
res.set(data, place)
res.set_lod(lod)
return res
def to_lodtensor(data, place): def to_lodtensor(data, place):
seq_lens = [len(seq) for seq in data] seq_lens = [len(seq) for seq in data]
cur_len = 0 cur_len = 0
...@@ -88,12 +171,13 @@ def to_lodtensor(data, place): ...@@ -88,12 +171,13 @@ def to_lodtensor(data, place):
return res return res
def main(): def train_main():
rnn_out = encoder_decoder() context = encoder()
label = layers.data( rnn_out = decoder_train(context)
label = pd.data(
name="target_language_next_word", shape=[1], dtype='int64', lod_level=1) name="target_language_next_word", shape=[1], dtype='int64', lod_level=1)
cost = layers.cross_entropy(input=rnn_out, label=label) cost = pd.cross_entropy(input=rnn_out, label=label)
avg_cost = fluid.layers.mean(x=cost) avg_cost = pd.mean(x=cost)
optimizer = fluid.optimizer.Adagrad(learning_rate=1e-4) optimizer = fluid.optimizer.Adagrad(learning_rate=1e-4)
optimizer.minimize(avg_cost) optimizer.minimize(avg_cost)
...@@ -103,13 +187,12 @@ def main(): ...@@ -103,13 +187,12 @@ def main():
paddle.dataset.wmt14.train(dict_size), buf_size=1000), paddle.dataset.wmt14.train(dict_size), buf_size=1000),
batch_size=batch_size) batch_size=batch_size)
place = core.CPUPlace()
exe = Executor(place) exe = Executor(place)
exe.run(framework.default_startup_program()) exe.run(framework.default_startup_program())
batch_id = 0 batch_id = 0
for pass_id in xrange(2): for pass_id in xrange(1):
for data in train_data(): for data in train_data():
word_data = to_lodtensor(map(lambda x: x[0], data), place) word_data = to_lodtensor(map(lambda x: x[0], data), place)
trg_word = to_lodtensor(map(lambda x: x[1], data), place) trg_word = to_lodtensor(map(lambda x: x[1], data), place)
...@@ -125,9 +208,48 @@ def main(): ...@@ -125,9 +208,48 @@ def main():
print('pass_id=' + str(pass_id) + ' batch=' + str(batch_id) + print('pass_id=' + str(pass_id) + ' batch=' + str(batch_id) +
" avg_cost=" + str(avg_cost_val)) " avg_cost=" + str(avg_cost_val))
if batch_id > 3: if batch_id > 3:
exit(0) break
batch_id += 1 batch_id += 1
def decode_main():
context = encoder()
translation_ids, translation_scores = decoder_decode(context)
exe = Executor(place)
exe.run(framework.default_startup_program())
init_ids_data = np.array([1 for _ in range(batch_size)], dtype='int64')
init_scores_data = np.array(
[1. for _ in range(batch_size)], dtype='float32')
init_ids_data = init_ids_data.reshape((batch_size, 1))
init_scores_data = init_scores_data.reshape((batch_size, 1))
init_lod = [i for i in range(batch_size)] + [batch_size]
init_lod = [init_lod, init_lod]
train_data = paddle.batch(
paddle.reader.shuffle(
paddle.dataset.wmt14.train(dict_size), buf_size=1000),
batch_size=batch_size)
for _, data in enumerate(train_data()):
init_ids = set_init_lod(init_ids_data, init_lod, place)
init_scores = set_init_lod(init_scores_data, init_lod, place)
src_word_data = to_lodtensor(map(lambda x: x[0], data), place)
result_ids, result_scores = exe.run(
framework.default_main_program(),
feed={
'src_word_id': src_word_data,
'init_ids': init_ids,
'init_scores': init_scores
},
fetch_list=[translation_ids, translation_scores],
return_numpy=False)
print result_ids.lod()
break
if __name__ == '__main__': if __name__ == '__main__':
main() # train_main()
decode_main()
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册