Skip to content

  • 体验新版
    • 正在加载...
  • 登录
  • PaddlePaddle
  • book
  • Issue
  • #746

B
book
  • 项目概览

PaddlePaddle / book

通知 17
Star 4
Fork 0
  • 代码
    • 文件
    • 提交
    • 分支
    • Tags
    • 贡献者
    • 分支图
    • Diff
  • Issue 40
    • 列表
    • 看板
    • 标记
    • 里程碑
  • 合并请求 37
  • Wiki 5
    • Wiki
  • 分析
    • 仓库
    • DevOps
  • 项目成员
  • Pages
B
book
  • 项目概览
    • 项目概览
    • 详情
    • 发布
  • 仓库
    • 仓库
    • 文件
    • 提交
    • 分支
    • 标签
    • 贡献者
    • 分支图
    • 比较
  • Issue 40
    • Issue 40
    • 列表
    • 看板
    • 标记
    • 里程碑
  • 合并请求 37
    • 合并请求 37
  • Pages
  • 分析
    • 分析
    • 仓库分析
    • DevOps
  • Wiki 5
    • Wiki
  • 成员
    • 成员
  • 收起侧边栏
  • 动态
  • 分支图
  • 创建新Issue
  • 提交
  • Issue看板
已关闭
开放中
Opened 6月 16, 2019 by saxon_zh@saxon_zhGuest

08.machine_translation infer 时报错

Created by: Alanyh

代码使用train.py中的代码,但是数据集是自己的,训练完成了,但是infer时报错: C++ Callstacks: DataType of Paddle Op sequence_expand Y must be the same. Get (float) != (int64_t) at [/paddle/paddle/fluid/framework/operator.cc:1115]

from future import print_function import os import six

import numpy as np import paddle import paddle.fluid as fluid

dict_size = 29364 source_dict_size = target_dict_size = dict_size word_dim = 512 hidden_dim = 512 decoder_size = hidden_dim max_length = 256 beam_size = 4 batch_size = 64

is_sparse = True model_save_dir = "1_machine_translation.inference.model"

def encoder(): src_word_id = fluid.layers.data( name="src_word_id", shape=[1], dtype='int64', lod_level=1) src_embedding = fluid.layers.embedding( input=src_word_id, size=[source_dict_size, word_dim], dtype='float32', is_sparse=is_sparse,param_attr='shared_w')

fc_forward = fluid.layers.fc(
    input=src_embedding, size=hidden_dim * 3, bias_attr=False)
src_forward = fluid.layers.dynamic_gru(input=fc_forward, size=hidden_dim)
fc_backward = fluid.layers.fc(
    input=src_embedding, size=hidden_dim * 3, bias_attr=False)
src_backward = fluid.layers.dynamic_gru(
    input=fc_backward, size=hidden_dim, is_reverse=True)
encoded_vector = fluid.layers.concat(
    input=[src_forward, src_backward], axis=1)
return encoded_vector

def cell(x, hidden, encoder_out, encoder_out_proj): def simple_attention(encoder_vec, encoder_proj, decoder_state): decoder_state_proj = fluid.layers.fc( input=decoder_state, size=decoder_size, bias_attr=False) decoder_state_expand = fluid.layers.sequence_expand( x=decoder_state_proj, y=encoder_proj) mixed_state = fluid.layers.elementwise_add(encoder_proj, decoder_state_expand) attention_weights = fluid.layers.fc( input=mixed_state, size=1, bias_attr=False) attention_weights = fluid.layers.sequence_softmax( input=attention_weights) weigths_reshape = fluid.layers.reshape(x=attention_weights, shape=[-1]) scaled = fluid.layers.elementwise_mul( x=encoder_vec, y=weigths_reshape, axis=0) context = fluid.layers.sequence_pool(input=scaled, pool_type='sum') return context

context = simple_attention(encoder_out, encoder_out_proj, hidden)
out = fluid.layers.fc(
    input=[x, context], size=decoder_size * 3, bias_attr=False)
out = fluid.layers.gru_unit(
    input=out, hidden=hidden, size=decoder_size * 3)[0]
return out, out

def train_decoder(encoder_out): encoder_last = fluid.layers.sequence_last_step(input=encoder_out) encoder_last_proj = fluid.layers.fc( input=encoder_last, size=decoder_size, act='tanh') # cache the encoder_out's computed result in attention encoder_out_proj = fluid.layers.fc( input=encoder_out, size=decoder_size, bias_attr=False)

trg_language_word = fluid.layers.data(
    name="target_language_word", shape=[1], dtype='int64', lod_level=1)
trg_embedding = fluid.layers.embedding(
    input=trg_language_word,
    size=[target_dict_size, word_dim],
    dtype='float32',
    is_sparse=is_sparse,param_attr='shared_w')

rnn = fluid.layers.DynamicRNN()
with rnn.block():
    x = rnn.step_input(trg_embedding)
    pre_state = rnn.memory(init=encoder_last_proj, need_reorder=True)
    encoder_out = rnn.static_input(encoder_out)
    encoder_out_proj = rnn.static_input(encoder_out_proj)
    out, current_state = cell(x, pre_state, encoder_out, encoder_out_proj)
    prob = fluid.layers.fc(input=out, size=target_dict_size, act='softmax')

    rnn.update_memory(pre_state, current_state)
    rnn.output(prob)

return rnn()

def train_model(): encoder_out = encoder() rnn_out = train_decoder(encoder_out) label = fluid.layers.data( name="target_language_next_word", shape=[1], dtype='int64', lod_level=1) cost = fluid.layers.cross_entropy(input=rnn_out, label=label) avg_cost = fluid.layers.mean(cost) return avg_cost

def optimizer_func(): fluid.clip.set_gradient_clip( clip=fluid.clip.GradientClipByGlobalNorm(clip_norm=5.0)) lr_decay = fluid.layers.learning_rate_scheduler.noam_decay(hidden_dim, 1000) return fluid.optimizer.Adam( learning_rate=lr_decay, regularization=fluid.regularizer.L2DecayRegularizer( regularization_coeff=1e-4))

def train(use_cuda): train_prog = fluid.Program() startup_prog = fluid.Program() with fluid.program_guard(train_prog, startup_prog): with fluid.unique_name.guard(): avg_cost = train_model() optimizer = optimizer_func() optimizer.minimize(avg_cost)

place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
exe = fluid.Executor(place)
#fluid.io.load_params(exe, model_save_dir, main_program=train_prog)

train_data = paddle.batch(
        paddle.reader.shuffle(
            train_reader1, buf_size=100000),
        batch_size=batch_size)

feeder = fluid.DataFeeder(
    feed_list=[
        'src_word_id', 'target_language_word', 'target_language_next_word'
    ],
    place=place,
    program=train_prog)

exe.run(startup_prog)

EPOCH_NUM = 20
for pass_id in six.moves.xrange(EPOCH_NUM):
    batch_id = 0
    for data in train_data():
        cost = exe.run(
            train_prog, feed=feeder.feed(data), fetch_list=[avg_cost])[0]
        print('pass_id: %d, batch_id: %d, loss: %f' % (pass_id, batch_id,
                                                       cost))
        batch_id += 1
    fluid.io.save_params(exe, model_save_dir, main_program=train_prog)

def infer_decoder(encoder_out): encoder_last = fluid.layers.sequence_last_step(input=encoder_out) encoder_last_proj = fluid.layers.fc( input=encoder_last, size=decoder_size, act='tanh') encoder_out_proj = fluid.layers.fc( input=encoder_out, size=decoder_size, bias_attr=False)

max_len = fluid.layers.fill_constant(
    shape=[1], dtype='int64', value=max_length)
counter = fluid.layers.zeros(shape=[1], dtype='int64', force_cpu=True)

init_ids = fluid.layers.data(
    name="init_ids", shape=[1], dtype="int64", lod_level=2)
init_scores = fluid.layers.data(
    name="init_scores", shape=[1], dtype="float32", lod_level=2)
# create and init arrays to save selected ids, scores and states for each step
ids_array = fluid.layers.array_write(init_ids, i=counter)
scores_array = fluid.layers.array_write(init_scores, i=counter)
state_array = fluid.layers.array_write(encoder_last_proj, i=counter)

cond = fluid.layers.less_than(x=counter, y=max_len)
while_op = fluid.layers.While(cond=cond)
with while_op.block():
    pre_ids = fluid.layers.array_read(array=ids_array, i=counter)
    pre_score = fluid.layers.array_read(array=scores_array, i=counter)
    pre_state = fluid.layers.array_read(array=state_array, i=counter)

    pre_ids_emb = fluid.layers.embedding(
        input=pre_ids,
        size=[target_dict_size, word_dim],
        dtype='float32',
        is_sparse=is_sparse,param_attr='shared_w')
    out, current_state = cell(pre_ids_emb, pre_state, encoder_out,
                              encoder_out_proj)
    prob = fluid.layers.fc(
        input=current_state, size=target_dict_size, act='softmax')

    # beam search
    topk_scores, topk_indices = fluid.layers.topk(prob, k=beam_size)
    accu_scores = fluid.layers.elementwise_add(
        x=fluid.layers.log(topk_scores),
        y=fluid.layers.reshape(pre_score, shape=[-1]),
        axis=0)
    accu_scores = fluid.layers.lod_reset(x=accu_scores, y=pre_ids)
    selected_ids, selected_scores = fluid.layers.beam_search(
        pre_ids, pre_score, topk_indices, accu_scores, beam_size, end_id=1)

    fluid.layers.increment(x=counter, value=1, in_place=True)
    # save selected ids and corresponding scores of each step
    fluid.layers.array_write(selected_ids, array=ids_array, i=counter)
    fluid.layers.array_write(selected_scores, array=scores_array, i=counter)
    # update rnn state by sequence_expand acting as gather
    current_state = fluid.layers.sequence_expand(current_state,
                                                 selected_ids)
    fluid.layers.array_write(current_state, array=state_array, i=counter)
    current_enc_out = fluid.layers.sequence_expand(encoder_out,
                                                   selected_ids)
    fluid.layers.assign(current_enc_out, encoder_out)
    current_enc_out_proj = fluid.layers.sequence_expand(encoder_out_proj,
                                                        selected_ids)
    fluid.layers.assign(current_enc_out_proj, encoder_out_proj)

    # update conditional variable
    length_cond = fluid.layers.less_than(x=counter, y=max_len)
    finish_cond = fluid.layers.logical_not(
        fluid.layers.is_empty(x=selected_ids))
    fluid.layers.logical_and(x=length_cond, y=finish_cond, out=cond)

translation_ids, translation_scores = fluid.layers.beam_search_decode(
    ids=ids_array, scores=scores_array, beam_size=beam_size, end_id=1)

return translation_ids, translation_scores

def infer_model(): encoder_out = encoder() translation_ids, translation_scores = infer_decoder(encoder_out) return translation_ids, translation_scores

def infer(use_cuda): infer_prog = fluid.Program() startup_prog = fluid.Program() with fluid.program_guard(infer_prog, startup_prog): with fluid.unique_name.guard(): translation_ids, translation_scores = infer_model()

place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
exe = fluid.Executor(place)

test_data = paddle.batch(
    test_reader,
    batch_size=batch_size)
src_idx2word = reverse_vocab
trg_idx2word = reverse_vocab

fluid.io.load_params(exe, model_save_dir, main_program=infer_prog)

for data in test_data():
    src_word_id = fluid.create_lod_tensor(
        data=[x[0] for x in data],
        recursive_seq_lens=[[len(x[0]) for x in data]],
        place=place)
    init_ids = fluid.create_lod_tensor(
        data=np.array([[0]] * len(data), dtype='int64'),
        recursive_seq_lens=[[1] * len(data)] * 2,
        place=place)
    init_scores = fluid.create_lod_tensor(
        data=np.array([[0.]] * len(data), dtype='float32'),
        recursive_seq_lens=[[1] * len(data)] * 2,
        place=place)
    seq_ids, seq_scores = exe.run(
        infer_prog,
        feed={
            'src_word_id': src_word_id,
            'init_ids': init_ids,
            'init_scores': init_scores
        },
        fetch_list=[translation_ids, translation_scores],
        return_numpy=False)
    # How to parse the results:
    #   Suppose the lod of seq_ids is:
    #     [[0, 3, 6], [0, 12, 24, 40, 54, 67, 82]]
    #   then from lod[0]:
    #     there are 2 source sentences, beam width is 3.
    #   from lod[1]:
    #     the first source sentence has 3 hyps; the lengths are 12, 12, 16
    #     the second source sentence has 3 hyps; the lengths are 14, 13, 15
    hyps = [[] for i in range(len(seq_ids.lod()[0]) - 1)]
    scores = [[] for i in range(len(seq_scores.lod()[0]) - 1)]
    for i in range(len(seq_ids.lod()[0]) - 1):  # for each source sentence
        start = seq_ids.lod()[0][i]
        end = seq_ids.lod()[0][i + 1]
        print("Original sentence:")
        print(" ".join([src_idx2word[idx] for idx in data[i][0][1:-1]]))
        print("Translated score and sentence:")
        for j in range(end - start):  # for each candidate
            sub_start = seq_ids.lod()[1][start + j]
            sub_end = seq_ids.lod()[1][start + j + 1]
            hyps[i].append(" ".join([
                trg_idx2word[idx]
                for idx in np.array(seq_ids)[sub_start:sub_end][1:-1]
            ]))
            scores[i].append(np.array(seq_scores)[sub_end - 1])
            print(scores[i][-1], hyps[i][-1].encode('utf8'))

def main(use_cuda): train(use_cuda) #infer(use_cuda)

if name == 'main': use_cuda = False # set to True if training with GPU main(use_cuda) #infer(use_cuda)

指派人
分配到
无
里程碑
无
分配里程碑
工时统计
无
截止日期
无
标识: paddlepaddle/book#746
渝ICP备2023009037号

京公网安备11010502055752号

网络110报警服务 Powered by GitLab CE v13.7
开源知识
Git 入门 Pro Git 电子书 在线学 Git
Markdown 基础入门 IT 技术知识开源图谱
帮助
使用手册 反馈建议 博客
《GitCode 隐私声明》 《GitCode 服务条款》 关于GitCode
Powered by GitLab CE v13.7