From 16a3a9219ac0f6d51d2733cf4f3990734a9152b1 Mon Sep 17 00:00:00 2001 From: guosheng Date: Wed, 16 Sep 2020 16:44:03 +0800 Subject: [PATCH] Update Transformer, seq2seq, sequence_tagging to adapt to 2.0-beta hapi apis. --- bert/readme.md | 2 + bert_leveldb/readme.md | 2 + sentiment_classification/README.md | 3 +- seq2seq/predict.py | 31 +++++++------- seq2seq/seq2seq_attn.py | 7 +--- seq2seq/seq2seq_base.py | 12 ++---- seq2seq/train.py | 23 +++++------ seq2seq/utility.py | 11 +++-- sequence_tagging/eval.py | 17 ++++---- sequence_tagging/predict.py | 12 ++++-- sequence_tagging/sequence_tagging.py | 17 ++++---- sequence_tagging/train.py | 19 +++++---- transformer/predict.py | 52 ++++++++++++------------ transformer/train.py | 60 ++++++++++++++-------------- transformer/transformer.py | 11 ++--- 15 files changed, 138 insertions(+), 141 deletions(-) diff --git a/bert/readme.md b/bert/readme.md index e367d88..3e7727c 100644 --- a/bert/readme.md +++ b/bert/readme.md @@ -1,3 +1,5 @@ +**仍在开发中,待完成** + 1. download data: wget https://paddle-hapi.bj.bcebos.com/data/bert_data.tar.gz 2. unzip data: tar -zvxf bert_data.tar.gz diff --git a/bert_leveldb/readme.md b/bert_leveldb/readme.md index fbcb7e7..714e664 100644 --- a/bert_leveldb/readme.md +++ b/bert_leveldb/readme.md @@ -1,3 +1,5 @@ +**仍在开发中,待完成** + 0. python3.7 -m pip install leveldb 1. download data: wget https://paddle-hapi.bj.bcebos.com/data/bert_data.tar.gz diff --git a/sentiment_classification/README.md b/sentiment_classification/README.md index 9de6eb7..a13c9f6 100644 --- a/sentiment_classification/README.md +++ b/sentiment_classification/README.md @@ -1,5 +1,6 @@ -## 简介 +**仍在开发中,待完成** +## 简介 情感是人类的一种高级智能行为,为了识别文本的情感倾向,需要深入的语义建模。另外,不同领域(如餐饮、体育)在情感的表达各不相同,因而需要有大规模覆盖各个领域的数据进行模型训练。为此,我们通过基于深度学习的语义模型和大规模数据挖掘解决上述两个问题。效果上,我们基于开源情感倾向分类数据集ChnSentiCorp进行评测。具体数据如下所示: diff --git a/seq2seq/predict.py b/seq2seq/predict.py index db8aef1..39ffd65 100644 --- a/seq2seq/predict.py +++ b/seq2seq/predict.py @@ -19,11 +19,12 @@ import random from functools import partial import numpy as np +import paddle import paddle.fluid as fluid from paddle.fluid.layers.utils import flatten from paddle.fluid.io import DataLoader +from paddle.static import InputSpec as Input -from paddle.incubate.hapi.model import Input, set_device from args import parse_args from seq2seq_base import BaseInferModel from seq2seq_attn import AttentionInferModel @@ -48,7 +49,7 @@ def post_process_seq(seq, bos_idx, eos_idx, output_bos=False, def do_predict(args): - device = set_device("gpu" if args.use_gpu else "cpu") + device = paddle.set_device("gpu" if args.use_gpu else "cpu") fluid.enable_dygraph(device) if args.eager_run else None # define model @@ -84,19 +85,21 @@ def do_predict(args): return_list=True) model_maker = AttentionInferModel if args.attention else BaseInferModel - model = model_maker( - args.src_vocab_size, - args.tar_vocab_size, - args.hidden_size, - args.hidden_size, - args.num_layers, - args.dropout, - bos_id=bos_id, - eos_id=eos_id, - beam_size=args.beam_size, - max_out_len=256) + model = paddle.Model( + model_maker( + args.src_vocab_size, + args.tar_vocab_size, + args.hidden_size, + args.hidden_size, + args.num_layers, + args.dropout, + bos_id=bos_id, + eos_id=eos_id, + beam_size=args.beam_size, + max_out_len=256), + inputs=inputs) - model.prepare(inputs=inputs, device=device) + model.prepare() # load the trained model assert args.reload_model, ( diff --git a/seq2seq/seq2seq_attn.py b/seq2seq/seq2seq_attn.py index 0979d68..472efcd 100644 --- a/seq2seq/seq2seq_attn.py +++ b/seq2seq/seq2seq_attn.py @@ -18,10 +18,7 @@ from paddle.fluid import ParamAttr from paddle.fluid.initializer import UniformInitializer from paddle.fluid.dygraph import Embedding, Linear, Layer from paddle.fluid.layers import BeamSearchDecoder - -from paddle.incubate.hapi.model import Model -from paddle.incubate.hapi.loss import Loss -from paddle.incubate.hapi.text import DynamicDecode, RNN, BasicLSTMCell, RNNCell +from paddle.text import DynamicDecode, RNN, BasicLSTMCell, RNNCell from seq2seq_base import Encoder @@ -138,7 +135,7 @@ class Decoder(Layer): return predict -class AttentionModel(Model): +class AttentionModel(Layer): def __init__(self, src_vocab_size, trg_vocab_size, diff --git a/seq2seq/seq2seq_base.py b/seq2seq/seq2seq_base.py index 8d8b5b6..07a0018 100644 --- a/seq2seq/seq2seq_base.py +++ b/seq2seq/seq2seq_base.py @@ -18,18 +18,14 @@ from paddle.fluid import ParamAttr from paddle.fluid.initializer import UniformInitializer from paddle.fluid.dygraph import Embedding, Linear, Layer from paddle.fluid.layers import BeamSearchDecoder +from paddle.text import DynamicDecode, RNN, BasicLSTMCell, RNNCell -from paddle.incubate.hapi.model import Model -from paddle.incubate.hapi.loss import Loss -from paddle.incubate.hapi.text import DynamicDecode, RNN, BasicLSTMCell, RNNCell - -class CrossEntropyCriterion(Loss): +class CrossEntropyCriterion(Layer): def __init__(self): super(CrossEntropyCriterion, self).__init__() - def forward(self, outputs, labels): - predict, (trg_length, label) = outputs[0], labels + def forward(self, predict, trg_length, label): # for target padding mask mask = layers.sequence_mask( trg_length, maxlen=layers.shape(predict)[1], dtype=predict.dtype) @@ -140,7 +136,7 @@ class Decoder(Layer): return predict -class BaseModel(Model): +class BaseModel(Layer): def __init__(self, src_vocab_size, trg_vocab_size, diff --git a/seq2seq/train.py b/seq2seq/train.py index b065ff5..104c2f7 100644 --- a/seq2seq/train.py +++ b/seq2seq/train.py @@ -15,14 +15,15 @@ import logging import os import random +from args import parse_args from functools import partial import numpy as np +import paddle import paddle.fluid as fluid from paddle.fluid.io import DataLoader +from paddle.static import InputSpec as Input -from paddle.incubate.hapi.model import Input, set_device -from args import parse_args from seq2seq_base import BaseModel, CrossEntropyCriterion from seq2seq_attn import AttentionModel from reader import create_data_loader @@ -30,7 +31,7 @@ from utility import PPL, TrainCallback, get_model_cls def do_train(args): - device = set_device("gpu" if args.use_gpu else "cpu") + device = paddle.set_device("gpu" if args.use_gpu else "cpu") fluid.enable_dygraph(device) if args.eager_run else None if args.enable_ce: @@ -58,9 +59,11 @@ def do_train(args): model_maker = get_model_cls( AttentionModel) if args.attention else get_model_cls(BaseModel) - model = model_maker(args.src_vocab_size, args.tar_vocab_size, - args.hidden_size, args.hidden_size, args.num_layers, - args.dropout) + model = paddle.Model( + model_maker(args.src_vocab_size, args.tar_vocab_size, args.hidden_size, + args.hidden_size, args.num_layers, args.dropout), + inputs=inputs, + labels=labels) grad_clip = fluid.clip.GradientClipByGlobalNorm( clip_norm=args.max_grad_norm) optimizer = fluid.optimizer.Adam( @@ -69,13 +72,7 @@ def do_train(args): grad_clip=grad_clip) ppl_metric = PPL(reset_freq=100) # ppl for every 100 batches - model.prepare( - optimizer, - CrossEntropyCriterion(), - ppl_metric, - inputs=inputs, - labels=labels, - device=device) + model.prepare(optimizer, CrossEntropyCriterion(), ppl_metric) model.fit(train_data=train_loader, eval_data=eval_loader, epochs=args.max_epoch, diff --git a/seq2seq/utility.py b/seq2seq/utility.py index cb2114c..fc446ef 100644 --- a/seq2seq/utility.py +++ b/seq2seq/utility.py @@ -15,14 +15,13 @@ import math import functools +import paddle import paddle.fluid as fluid +from paddle.metric import Metric +from paddle.text import BasicLSTMCell -from paddle.incubate.hapi.metrics import Metric -from paddle.incubate.hapi.callbacks import ProgBarLogger -from paddle.incubate.hapi.text import BasicLSTMCell - -class TrainCallback(ProgBarLogger): +class TrainCallback(paddle.callbacks.ProgBarLogger): def __init__(self, ppl, log_freq, verbose=2): super(TrainCallback, self).__init__(log_freq, verbose) self.ppl = ppl @@ -58,7 +57,7 @@ class PPL(Metric): self.reset_freq = reset_freq self.reset() - def add_metric_op(self, pred, seq_length, label): + def compute(self, pred, seq_length, label): word_num = fluid.layers.reduce_sum(seq_length) return word_num diff --git a/sequence_tagging/eval.py b/sequence_tagging/eval.py index 4163ce7..a192a23 100644 --- a/sequence_tagging/eval.py +++ b/sequence_tagging/eval.py @@ -18,9 +18,10 @@ SequenceTagging eval structure from __future__ import division from __future__ import print_function +import paddle import paddle.fluid as fluid from paddle.fluid.layers.utils import flatten -from paddle.incubate.hapi.model import Input, set_device +from paddle.static import InputSpec as Input from sequence_tagging import SeqTagging, LacLoss, ChunkEval from reader import LacDataset, LacDataLoader @@ -29,7 +30,7 @@ from utils.configure import PDConfig def main(args): - place = set_device(args.device) + place = paddle.set_device(args.device) fluid.enable_dygraph(place) if args.dynamic else None inputs = [ @@ -45,14 +46,14 @@ def main(args): vocab_size = dataset.vocab_size num_labels = dataset.num_labels - model = SeqTagging(args, vocab_size, num_labels, mode="test") + model = paddle.Model( + SeqTagging( + args, vocab_size, num_labels, mode="test"), + inputs=inputs, + labels=labels) model.mode = "test" - model.prepare( - metrics=ChunkEval(num_labels), - inputs=inputs, - labels=labels, - device=place) + model.prepare(metrics=ChunkEval(num_labels)) model.load(args.init_from_checkpoint, skip_mismatch=True) eval_result = model.evaluate( diff --git a/sequence_tagging/predict.py b/sequence_tagging/predict.py index 583b41c..4c55ff7 100644 --- a/sequence_tagging/predict.py +++ b/sequence_tagging/predict.py @@ -20,9 +20,10 @@ from __future__ import print_function import six +import paddle import paddle.fluid as fluid from paddle.fluid.layers.utils import flatten -from paddle.incubate.hapi.model import Input, set_device +from paddle.static import InputSpec as Input from sequence_tagging import SeqTagging, LacLoss, ChunkEval from reader import LacDataset, LacDataLoader @@ -31,7 +32,7 @@ from utils.configure import PDConfig def main(args): - place = set_device(args.device) + place = paddle.set_device(args.device) fluid.enable_dygraph(place) if args.dynamic else None inputs = [ @@ -46,10 +47,13 @@ def main(args): vocab_size = dataset.vocab_size num_labels = dataset.num_labels - model = SeqTagging(args, vocab_size, num_labels, mode="predict") + model = paddle.Model( + SeqTagging( + args, vocab_size, num_labels, mode="predict"), + inputs=inputs) model.mode = "test" - model.prepare(inputs=inputs) + model.prepare() model.load(args.init_from_checkpoint, skip_mismatch=True) diff --git a/sequence_tagging/sequence_tagging.py b/sequence_tagging/sequence_tagging.py index 3392a61..9651162 100644 --- a/sequence_tagging/sequence_tagging.py +++ b/sequence_tagging/sequence_tagging.py @@ -25,17 +25,16 @@ import math import argparse import numpy as np +import paddle import paddle.fluid as fluid -from paddle.incubate.hapi.metrics import Metric -from paddle.incubate.hapi.model import Model -from paddle.incubate.hapi.loss import Loss -from paddle.incubate.hapi.text import SequenceTagging +from paddle.metric import Metric +from paddle.text import SequenceTagging from utils.check import check_gpu, check_version from utils.configure import PDConfig -class SeqTagging(Model): +class SeqTagging(fluid.dygraph.Layer): def __init__(self, args, vocab_size, num_labels, length=None, mode="train"): super(SeqTagging, self).__init__() @@ -131,13 +130,13 @@ class Chunk_eval(fluid.dygraph.Layer): return (num_infer_chunks, num_label_chunks, num_correct_chunks) -class LacLoss(Loss): +class LacLoss(fluid.dygraph.Layer): def __init__(self): super(LacLoss, self).__init__() pass - def forward(self, outputs, labels): - avg_cost = outputs[1] + def forward(self, *args): + avg_cost = args[1] return avg_cost @@ -149,7 +148,7 @@ class ChunkEval(Metric): int(math.ceil((num_labels - 1) / 2.0)), "IOB") self.reset() - def add_metric_op(self, *args): + def compute(self, *args): crf_decode = args[0] lengths = args[2] label = args[3] diff --git a/sequence_tagging/train.py b/sequence_tagging/train.py index 5626838..670a466 100644 --- a/sequence_tagging/train.py +++ b/sequence_tagging/train.py @@ -18,9 +18,10 @@ SequenceTagging network structure from __future__ import division from __future__ import print_function +import paddle import paddle.fluid as fluid from paddle.fluid.optimizer import AdamOptimizer -from paddle.incubate.hapi.model import Input, set_device +from paddle.static import InputSpec as Input from sequence_tagging import SeqTagging, LacLoss, ChunkEval from reader import LacDataset, LacDataLoader @@ -29,7 +30,7 @@ from utils.configure import PDConfig def main(args): - place = set_device(args.device) + place = paddle.set_device(args.device) fluid.enable_dygraph(place) if args.dynamic else None inputs = [ @@ -48,19 +49,17 @@ def main(args): vocab_size = dataset.vocab_size num_labels = dataset.num_labels - model = SeqTagging(args, vocab_size, num_labels, mode="train") + model = paddle.Model( + SeqTagging( + args, vocab_size, num_labels, mode="train"), + inputs=inputs, + labels=labels) optim = AdamOptimizer( learning_rate=args.base_learning_rate, parameter_list=model.parameters()) - model.prepare( - optim, - LacLoss(), - ChunkEval(num_labels), - inputs=inputs, - labels=labels, - device=args.device) + model.prepare(optim, LacLoss(), ChunkEval(num_labels)) if args.init_from_checkpoint: model.load(args.init_from_checkpoint) diff --git a/transformer/predict.py b/transformer/predict.py index 5521d6c..87e40d7 100644 --- a/transformer/predict.py +++ b/transformer/predict.py @@ -21,11 +21,11 @@ import paddle import paddle.fluid as fluid from paddle.io import DataLoader from paddle.fluid.layers.utils import flatten +from paddle.static import InputSpec as Input from utils.configure import PDConfig from utils.check import check_gpu, check_version -from paddle.incubate.hapi.model import Input, set_device from reader import prepare_infer_input, Seq2SeqDataset, Seq2SeqBatchSampler from transformer import InferTransformer @@ -48,7 +48,7 @@ def post_process_seq(seq, bos_idx, eos_idx, output_bos=False, def do_predict(args): - device = set_device("gpu" if args.use_cuda else "cpu") + device = paddle.set_device("gpu" if args.use_cuda else "cpu") fluid.enable_dygraph(device) if args.eager_run else None inputs = [ @@ -99,37 +99,39 @@ def do_predict(args): return_list=True) # define model - transformer = InferTransformer( - args.src_vocab_size, - args.trg_vocab_size, - args.max_length + 1, - args.n_layer, - args.n_head, - args.d_key, - args.d_value, - args.d_model, - args.d_inner_hid, - args.prepostprocess_dropout, - args.attention_dropout, - args.relu_dropout, - args.preprocess_cmd, - args.postprocess_cmd, - args.weight_sharing, - args.bos_idx, - args.eos_idx, - beam_size=args.beam_size, - max_out_len=args.max_out_len) - transformer.prepare(inputs=inputs, device=device) + model = paddle.Model( + InferTransformer( + args.src_vocab_size, + args.trg_vocab_size, + args.max_length + 1, + args.n_layer, + args.n_head, + args.d_key, + args.d_value, + args.d_model, + args.d_inner_hid, + args.prepostprocess_dropout, + args.attention_dropout, + args.relu_dropout, + args.preprocess_cmd, + args.postprocess_cmd, + args.weight_sharing, + args.bos_idx, + args.eos_idx, + beam_size=args.beam_size, + max_out_len=args.max_out_len), + inputs) + model.prepare() # load the trained model assert args.init_from_params, ( "Please set init_from_params to load the infer model.") - transformer.load(args.init_from_params) + model.load(args.init_from_params) # TODO: use model.predict when support variant length f = open(args.output_file, "wb") for data in data_loader(): - finished_seq = transformer.test_batch(inputs=flatten(data))[0] + finished_seq = model.test_batch(inputs=flatten(data))[0] finished_seq = np.transpose(finished_seq, [0, 2, 1]) for ins in finished_seq: for beam_idx, beam in enumerate(ins): diff --git a/transformer/train.py b/transformer/train.py index 90f54d0..23aeece 100644 --- a/transformer/train.py +++ b/transformer/train.py @@ -19,17 +19,16 @@ import numpy as np import paddle import paddle.fluid as fluid from paddle.io import DataLoader +from paddle.static import InputSpec as Input from utils.configure import PDConfig from utils.check import check_gpu, check_version -from paddle.incubate.hapi.model import Input, set_device -from paddle.incubate.hapi.callbacks import ProgBarLogger from reader import create_data_loader from transformer import Transformer, CrossEntropyCriterion -class TrainCallback(ProgBarLogger): +class TrainCallback(paddle.callbacks.ProgBarLogger): def __init__(self, args, verbose=2, @@ -75,7 +74,7 @@ class TrainCallback(ProgBarLogger): def do_train(args): - device = set_device("gpu" if args.use_cuda else "cpu") + device = paddle.set_device("gpu" if args.use_cuda else "cpu") fluid.enable_dygraph(device) if args.eager_run else None # set seed for CE @@ -119,14 +118,16 @@ def do_train(args): eval_loader, eval_steps_fn) = create_data_loader(args, device) # define model - transformer = Transformer( - args.src_vocab_size, args.trg_vocab_size, args.max_length + 1, - args.n_layer, args.n_head, args.d_key, args.d_value, args.d_model, - args.d_inner_hid, args.prepostprocess_dropout, args.attention_dropout, - args.relu_dropout, args.preprocess_cmd, args.postprocess_cmd, - args.weight_sharing, args.bos_idx, args.eos_idx) - - transformer.prepare( + model = paddle.Model( + Transformer(args.src_vocab_size, args.trg_vocab_size, + args.max_length + 1, args.n_layer, args.n_head, args.d_key, + args.d_value, args.d_model, args.d_inner_hid, + args.prepostprocess_dropout, args.attention_dropout, + args.relu_dropout, args.preprocess_cmd, + args.postprocess_cmd, args.weight_sharing, args.bos_idx, + args.eos_idx), inputs, labels) + + model.prepare( fluid.optimizer.Adam( learning_rate=fluid.layers.noam_decay( args.d_model, @@ -135,32 +136,29 @@ def do_train(args): beta1=args.beta1, beta2=args.beta2, epsilon=float(args.eps), - parameter_list=transformer.parameters()), - CrossEntropyCriterion(args.label_smooth_eps), - inputs=inputs, - labels=labels, - device=device) + parameter_list=model.parameters()), + CrossEntropyCriterion(args.label_smooth_eps)) ## init from some checkpoint, to resume the previous training if args.init_from_checkpoint: - transformer.load(args.init_from_checkpoint) + model.load(args.init_from_checkpoint) ## init from some pretrain models, to better solve the current task if args.init_from_pretrain_model: - transformer.load(args.init_from_pretrain_model, reset_optimizer=True) + model.load(args.init_from_pretrain_model, reset_optimizer=True) # model train - transformer.fit(train_data=train_loader, - eval_data=eval_loader, - epochs=args.epoch, - eval_freq=1, - save_freq=1, - save_dir=args.save_model, - callbacks=[ - TrainCallback( - args, - train_steps_fn=train_steps_fn, - eval_steps_fn=eval_steps_fn) - ]) + model.fit(train_data=train_loader, + eval_data=eval_loader, + epochs=args.epoch, + eval_freq=1, + save_freq=1, + save_dir=args.save_model, + callbacks=[ + TrainCallback( + args, + train_steps_fn=train_steps_fn, + eval_steps_fn=eval_steps_fn) + ]) if __name__ == "__main__": diff --git a/transformer/transformer.py b/transformer/transformer.py index d7c389c..8359bf5 100644 --- a/transformer/transformer.py +++ b/transformer/transformer.py @@ -19,9 +19,7 @@ import numpy as np import paddle.fluid as fluid import paddle.fluid.layers as layers from paddle.fluid.dygraph import Embedding, LayerNorm, Linear, Layer -from paddle.incubate.hapi.model import Model -from paddle.incubate.hapi.loss import Loss -from paddle.incubate.hapi.text import TransformerBeamSearchDecoder, DynamicDecode +from paddle.text import TransformerBeamSearchDecoder, DynamicDecode def position_encoding_init(n_position, d_pos_vec): @@ -498,13 +496,12 @@ class WrapDecoder(Layer): return logits -class CrossEntropyCriterion(Loss): +class CrossEntropyCriterion(Layer): def __init__(self, label_smooth_eps): super(CrossEntropyCriterion, self).__init__() self.label_smooth_eps = label_smooth_eps - def forward(self, outputs, labels): - predict, (label, weights) = outputs[0], labels + def forward(self, predict, label, weights): if self.label_smooth_eps: label = layers.label_smooth( label=layers.one_hot( @@ -523,7 +520,7 @@ class CrossEntropyCriterion(Loss): return avg_cost -class Transformer(Model): +class Transformer(Layer): """ model """ -- GitLab