diff --git a/bert/readme.md b/bert/readme.md index e367d881ec9e82f2735db47f8cae90fe9b005885..3e7727c6f9c1fdf73331e84b90b4979ee8b8bf7e 100644 --- a/bert/readme.md +++ b/bert/readme.md @@ -1,3 +1,5 @@ +**仍在开发中,待完成** + 1. download data: wget https://paddle-hapi.bj.bcebos.com/data/bert_data.tar.gz 2. unzip data: tar -zvxf bert_data.tar.gz diff --git a/bert_leveldb/readme.md b/bert_leveldb/readme.md index fbcb7e70d9e9be7def58f06dc305077c3136076c..714e66463e52c0deecb9848cd898cef140d32a6d 100644 --- a/bert_leveldb/readme.md +++ b/bert_leveldb/readme.md @@ -1,3 +1,5 @@ +**仍在开发中,待完成** + 0. python3.7 -m pip install leveldb 1. download data: wget https://paddle-hapi.bj.bcebos.com/data/bert_data.tar.gz diff --git a/sentiment_classification/README.md b/sentiment_classification/README.md index 9de6eb7ecfb4cb557b8296309710c2c7c5240b77..a13c9f61049d2714e1fb334f370587a45756e6be 100644 --- a/sentiment_classification/README.md +++ b/sentiment_classification/README.md @@ -1,5 +1,6 @@ -## 简介 +**仍在开发中,待完成** +## 简介 情感是人类的一种高级智能行为,为了识别文本的情感倾向,需要深入的语义建模。另外,不同领域(如餐饮、体育)在情感的表达各不相同,因而需要有大规模覆盖各个领域的数据进行模型训练。为此,我们通过基于深度学习的语义模型和大规模数据挖掘解决上述两个问题。效果上,我们基于开源情感倾向分类数据集ChnSentiCorp进行评测。具体数据如下所示: diff --git a/seq2seq/predict.py b/seq2seq/predict.py index db8aef1330fb8a33934c17c789563a606fcd5350..39ffd657705fdd993d7d6be59c62ac0f395bd370 100644 --- a/seq2seq/predict.py +++ b/seq2seq/predict.py @@ -19,11 +19,12 @@ import random from functools import partial import numpy as np +import paddle import paddle.fluid as fluid from paddle.fluid.layers.utils import flatten from paddle.fluid.io import DataLoader +from paddle.static import InputSpec as Input -from paddle.incubate.hapi.model import Input, set_device from args import parse_args from seq2seq_base import BaseInferModel from seq2seq_attn import AttentionInferModel @@ -48,7 +49,7 @@ def post_process_seq(seq, bos_idx, eos_idx, output_bos=False, def do_predict(args): - device = set_device("gpu" if args.use_gpu else "cpu") + device = paddle.set_device("gpu" if args.use_gpu else "cpu") fluid.enable_dygraph(device) if args.eager_run else None # define model @@ -84,19 +85,21 @@ def do_predict(args): return_list=True) model_maker = AttentionInferModel if args.attention else BaseInferModel - model = model_maker( - args.src_vocab_size, - args.tar_vocab_size, - args.hidden_size, - args.hidden_size, - args.num_layers, - args.dropout, - bos_id=bos_id, - eos_id=eos_id, - beam_size=args.beam_size, - max_out_len=256) + model = paddle.Model( + model_maker( + args.src_vocab_size, + args.tar_vocab_size, + args.hidden_size, + args.hidden_size, + args.num_layers, + args.dropout, + bos_id=bos_id, + eos_id=eos_id, + beam_size=args.beam_size, + max_out_len=256), + inputs=inputs) - model.prepare(inputs=inputs, device=device) + model.prepare() # load the trained model assert args.reload_model, ( diff --git a/seq2seq/seq2seq_attn.py b/seq2seq/seq2seq_attn.py index 0979d68c3a7612349521dab473491c1fb118e287..472efcd0bda992905edeefa73bbc9003a17564ec 100644 --- a/seq2seq/seq2seq_attn.py +++ b/seq2seq/seq2seq_attn.py @@ -18,10 +18,7 @@ from paddle.fluid import ParamAttr from paddle.fluid.initializer import UniformInitializer from paddle.fluid.dygraph import Embedding, Linear, Layer from paddle.fluid.layers import BeamSearchDecoder - -from paddle.incubate.hapi.model import Model -from paddle.incubate.hapi.loss import Loss -from paddle.incubate.hapi.text import DynamicDecode, RNN, BasicLSTMCell, RNNCell +from paddle.text import DynamicDecode, RNN, BasicLSTMCell, RNNCell from seq2seq_base import Encoder @@ -138,7 +135,7 @@ class Decoder(Layer): return predict -class AttentionModel(Model): +class AttentionModel(Layer): def __init__(self, src_vocab_size, trg_vocab_size, diff --git a/seq2seq/seq2seq_base.py b/seq2seq/seq2seq_base.py index 8d8b5b6299fab04c79a523677cf1a8d00c7d711e..07a0018da23b0ebdd59c6de368871a4f5526831a 100644 --- a/seq2seq/seq2seq_base.py +++ b/seq2seq/seq2seq_base.py @@ -18,18 +18,14 @@ from paddle.fluid import ParamAttr from paddle.fluid.initializer import UniformInitializer from paddle.fluid.dygraph import Embedding, Linear, Layer from paddle.fluid.layers import BeamSearchDecoder +from paddle.text import DynamicDecode, RNN, BasicLSTMCell, RNNCell -from paddle.incubate.hapi.model import Model -from paddle.incubate.hapi.loss import Loss -from paddle.incubate.hapi.text import DynamicDecode, RNN, BasicLSTMCell, RNNCell - -class CrossEntropyCriterion(Loss): +class CrossEntropyCriterion(Layer): def __init__(self): super(CrossEntropyCriterion, self).__init__() - def forward(self, outputs, labels): - predict, (trg_length, label) = outputs[0], labels + def forward(self, predict, trg_length, label): # for target padding mask mask = layers.sequence_mask( trg_length, maxlen=layers.shape(predict)[1], dtype=predict.dtype) @@ -140,7 +136,7 @@ class Decoder(Layer): return predict -class BaseModel(Model): +class BaseModel(Layer): def __init__(self, src_vocab_size, trg_vocab_size, diff --git a/seq2seq/train.py b/seq2seq/train.py index b065ff59839fb2a8ddad79b268471efc82a6fd41..104c2f773fc7cfb5a9523f18a6f361969874bb64 100644 --- a/seq2seq/train.py +++ b/seq2seq/train.py @@ -15,14 +15,15 @@ import logging import os import random +from args import parse_args from functools import partial import numpy as np +import paddle import paddle.fluid as fluid from paddle.fluid.io import DataLoader +from paddle.static import InputSpec as Input -from paddle.incubate.hapi.model import Input, set_device -from args import parse_args from seq2seq_base import BaseModel, CrossEntropyCriterion from seq2seq_attn import AttentionModel from reader import create_data_loader @@ -30,7 +31,7 @@ from utility import PPL, TrainCallback, get_model_cls def do_train(args): - device = set_device("gpu" if args.use_gpu else "cpu") + device = paddle.set_device("gpu" if args.use_gpu else "cpu") fluid.enable_dygraph(device) if args.eager_run else None if args.enable_ce: @@ -58,9 +59,11 @@ def do_train(args): model_maker = get_model_cls( AttentionModel) if args.attention else get_model_cls(BaseModel) - model = model_maker(args.src_vocab_size, args.tar_vocab_size, - args.hidden_size, args.hidden_size, args.num_layers, - args.dropout) + model = paddle.Model( + model_maker(args.src_vocab_size, args.tar_vocab_size, args.hidden_size, + args.hidden_size, args.num_layers, args.dropout), + inputs=inputs, + labels=labels) grad_clip = fluid.clip.GradientClipByGlobalNorm( clip_norm=args.max_grad_norm) optimizer = fluid.optimizer.Adam( @@ -69,13 +72,7 @@ def do_train(args): grad_clip=grad_clip) ppl_metric = PPL(reset_freq=100) # ppl for every 100 batches - model.prepare( - optimizer, - CrossEntropyCriterion(), - ppl_metric, - inputs=inputs, - labels=labels, - device=device) + model.prepare(optimizer, CrossEntropyCriterion(), ppl_metric) model.fit(train_data=train_loader, eval_data=eval_loader, epochs=args.max_epoch, diff --git a/seq2seq/utility.py b/seq2seq/utility.py index cb2114c476f4601eebb595c9fef6143d3632454b..fc446efab265932a756fda4c46c8c80f752af6bb 100644 --- a/seq2seq/utility.py +++ b/seq2seq/utility.py @@ -15,14 +15,13 @@ import math import functools +import paddle import paddle.fluid as fluid +from paddle.metric import Metric +from paddle.text import BasicLSTMCell -from paddle.incubate.hapi.metrics import Metric -from paddle.incubate.hapi.callbacks import ProgBarLogger -from paddle.incubate.hapi.text import BasicLSTMCell - -class TrainCallback(ProgBarLogger): +class TrainCallback(paddle.callbacks.ProgBarLogger): def __init__(self, ppl, log_freq, verbose=2): super(TrainCallback, self).__init__(log_freq, verbose) self.ppl = ppl @@ -58,7 +57,7 @@ class PPL(Metric): self.reset_freq = reset_freq self.reset() - def add_metric_op(self, pred, seq_length, label): + def compute(self, pred, seq_length, label): word_num = fluid.layers.reduce_sum(seq_length) return word_num diff --git a/sequence_tagging/eval.py b/sequence_tagging/eval.py index 4163ce7922d7e503c733724d3d56baae62d823a5..a192a23375abe968301106a02c843823b89081ae 100644 --- a/sequence_tagging/eval.py +++ b/sequence_tagging/eval.py @@ -18,9 +18,10 @@ SequenceTagging eval structure from __future__ import division from __future__ import print_function +import paddle import paddle.fluid as fluid from paddle.fluid.layers.utils import flatten -from paddle.incubate.hapi.model import Input, set_device +from paddle.static import InputSpec as Input from sequence_tagging import SeqTagging, LacLoss, ChunkEval from reader import LacDataset, LacDataLoader @@ -29,7 +30,7 @@ from utils.configure import PDConfig def main(args): - place = set_device(args.device) + place = paddle.set_device(args.device) fluid.enable_dygraph(place) if args.dynamic else None inputs = [ @@ -45,14 +46,14 @@ def main(args): vocab_size = dataset.vocab_size num_labels = dataset.num_labels - model = SeqTagging(args, vocab_size, num_labels, mode="test") + model = paddle.Model( + SeqTagging( + args, vocab_size, num_labels, mode="test"), + inputs=inputs, + labels=labels) model.mode = "test" - model.prepare( - metrics=ChunkEval(num_labels), - inputs=inputs, - labels=labels, - device=place) + model.prepare(metrics=ChunkEval(num_labels)) model.load(args.init_from_checkpoint, skip_mismatch=True) eval_result = model.evaluate( diff --git a/sequence_tagging/predict.py b/sequence_tagging/predict.py index 583b41c0642a561222affbd1db8666fc0e769c62..4c55ff7bfc694f45940e9d9e2614087c6f3d2c73 100644 --- a/sequence_tagging/predict.py +++ b/sequence_tagging/predict.py @@ -20,9 +20,10 @@ from __future__ import print_function import six +import paddle import paddle.fluid as fluid from paddle.fluid.layers.utils import flatten -from paddle.incubate.hapi.model import Input, set_device +from paddle.static import InputSpec as Input from sequence_tagging import SeqTagging, LacLoss, ChunkEval from reader import LacDataset, LacDataLoader @@ -31,7 +32,7 @@ from utils.configure import PDConfig def main(args): - place = set_device(args.device) + place = paddle.set_device(args.device) fluid.enable_dygraph(place) if args.dynamic else None inputs = [ @@ -46,10 +47,13 @@ def main(args): vocab_size = dataset.vocab_size num_labels = dataset.num_labels - model = SeqTagging(args, vocab_size, num_labels, mode="predict") + model = paddle.Model( + SeqTagging( + args, vocab_size, num_labels, mode="predict"), + inputs=inputs) model.mode = "test" - model.prepare(inputs=inputs) + model.prepare() model.load(args.init_from_checkpoint, skip_mismatch=True) diff --git a/sequence_tagging/sequence_tagging.py b/sequence_tagging/sequence_tagging.py index 3392a61ffa417bbc3b7d6c1bf7e4b5043e6f0ffa..965116256d9d61baa2cfb59e6c8fbe2ad47d3b09 100644 --- a/sequence_tagging/sequence_tagging.py +++ b/sequence_tagging/sequence_tagging.py @@ -25,17 +25,16 @@ import math import argparse import numpy as np +import paddle import paddle.fluid as fluid -from paddle.incubate.hapi.metrics import Metric -from paddle.incubate.hapi.model import Model -from paddle.incubate.hapi.loss import Loss -from paddle.incubate.hapi.text import SequenceTagging +from paddle.metric import Metric +from paddle.text import SequenceTagging from utils.check import check_gpu, check_version from utils.configure import PDConfig -class SeqTagging(Model): +class SeqTagging(fluid.dygraph.Layer): def __init__(self, args, vocab_size, num_labels, length=None, mode="train"): super(SeqTagging, self).__init__() @@ -131,13 +130,13 @@ class Chunk_eval(fluid.dygraph.Layer): return (num_infer_chunks, num_label_chunks, num_correct_chunks) -class LacLoss(Loss): +class LacLoss(fluid.dygraph.Layer): def __init__(self): super(LacLoss, self).__init__() pass - def forward(self, outputs, labels): - avg_cost = outputs[1] + def forward(self, *args): + avg_cost = args[1] return avg_cost @@ -149,7 +148,7 @@ class ChunkEval(Metric): int(math.ceil((num_labels - 1) / 2.0)), "IOB") self.reset() - def add_metric_op(self, *args): + def compute(self, *args): crf_decode = args[0] lengths = args[2] label = args[3] diff --git a/sequence_tagging/train.py b/sequence_tagging/train.py index 5626838b2f02d6731bc8f31004bf920d149dedb9..670a466d2704240882bb0e624f6e892c1f883cbe 100644 --- a/sequence_tagging/train.py +++ b/sequence_tagging/train.py @@ -18,9 +18,10 @@ SequenceTagging network structure from __future__ import division from __future__ import print_function +import paddle import paddle.fluid as fluid from paddle.fluid.optimizer import AdamOptimizer -from paddle.incubate.hapi.model import Input, set_device +from paddle.static import InputSpec as Input from sequence_tagging import SeqTagging, LacLoss, ChunkEval from reader import LacDataset, LacDataLoader @@ -29,7 +30,7 @@ from utils.configure import PDConfig def main(args): - place = set_device(args.device) + place = paddle.set_device(args.device) fluid.enable_dygraph(place) if args.dynamic else None inputs = [ @@ -48,19 +49,17 @@ def main(args): vocab_size = dataset.vocab_size num_labels = dataset.num_labels - model = SeqTagging(args, vocab_size, num_labels, mode="train") + model = paddle.Model( + SeqTagging( + args, vocab_size, num_labels, mode="train"), + inputs=inputs, + labels=labels) optim = AdamOptimizer( learning_rate=args.base_learning_rate, parameter_list=model.parameters()) - model.prepare( - optim, - LacLoss(), - ChunkEval(num_labels), - inputs=inputs, - labels=labels, - device=args.device) + model.prepare(optim, LacLoss(), ChunkEval(num_labels)) if args.init_from_checkpoint: model.load(args.init_from_checkpoint) diff --git a/transformer/predict.py b/transformer/predict.py index 5521d6c78478cf72a7a78d91d508a061b4f39cc3..87e40d7e8511f9508833a66958cffe5e7e3e4b32 100644 --- a/transformer/predict.py +++ b/transformer/predict.py @@ -21,11 +21,11 @@ import paddle import paddle.fluid as fluid from paddle.io import DataLoader from paddle.fluid.layers.utils import flatten +from paddle.static import InputSpec as Input from utils.configure import PDConfig from utils.check import check_gpu, check_version -from paddle.incubate.hapi.model import Input, set_device from reader import prepare_infer_input, Seq2SeqDataset, Seq2SeqBatchSampler from transformer import InferTransformer @@ -48,7 +48,7 @@ def post_process_seq(seq, bos_idx, eos_idx, output_bos=False, def do_predict(args): - device = set_device("gpu" if args.use_cuda else "cpu") + device = paddle.set_device("gpu" if args.use_cuda else "cpu") fluid.enable_dygraph(device) if args.eager_run else None inputs = [ @@ -99,37 +99,39 @@ def do_predict(args): return_list=True) # define model - transformer = InferTransformer( - args.src_vocab_size, - args.trg_vocab_size, - args.max_length + 1, - args.n_layer, - args.n_head, - args.d_key, - args.d_value, - args.d_model, - args.d_inner_hid, - args.prepostprocess_dropout, - args.attention_dropout, - args.relu_dropout, - args.preprocess_cmd, - args.postprocess_cmd, - args.weight_sharing, - args.bos_idx, - args.eos_idx, - beam_size=args.beam_size, - max_out_len=args.max_out_len) - transformer.prepare(inputs=inputs, device=device) + model = paddle.Model( + InferTransformer( + args.src_vocab_size, + args.trg_vocab_size, + args.max_length + 1, + args.n_layer, + args.n_head, + args.d_key, + args.d_value, + args.d_model, + args.d_inner_hid, + args.prepostprocess_dropout, + args.attention_dropout, + args.relu_dropout, + args.preprocess_cmd, + args.postprocess_cmd, + args.weight_sharing, + args.bos_idx, + args.eos_idx, + beam_size=args.beam_size, + max_out_len=args.max_out_len), + inputs) + model.prepare() # load the trained model assert args.init_from_params, ( "Please set init_from_params to load the infer model.") - transformer.load(args.init_from_params) + model.load(args.init_from_params) # TODO: use model.predict when support variant length f = open(args.output_file, "wb") for data in data_loader(): - finished_seq = transformer.test_batch(inputs=flatten(data))[0] + finished_seq = model.test_batch(inputs=flatten(data))[0] finished_seq = np.transpose(finished_seq, [0, 2, 1]) for ins in finished_seq: for beam_idx, beam in enumerate(ins): diff --git a/transformer/train.py b/transformer/train.py index 90f54d01a85e811bee7c5cc6411277466639f4be..23aeece1893bc1a444c92055b27b5fbb6c15a318 100644 --- a/transformer/train.py +++ b/transformer/train.py @@ -19,17 +19,16 @@ import numpy as np import paddle import paddle.fluid as fluid from paddle.io import DataLoader +from paddle.static import InputSpec as Input from utils.configure import PDConfig from utils.check import check_gpu, check_version -from paddle.incubate.hapi.model import Input, set_device -from paddle.incubate.hapi.callbacks import ProgBarLogger from reader import create_data_loader from transformer import Transformer, CrossEntropyCriterion -class TrainCallback(ProgBarLogger): +class TrainCallback(paddle.callbacks.ProgBarLogger): def __init__(self, args, verbose=2, @@ -75,7 +74,7 @@ class TrainCallback(ProgBarLogger): def do_train(args): - device = set_device("gpu" if args.use_cuda else "cpu") + device = paddle.set_device("gpu" if args.use_cuda else "cpu") fluid.enable_dygraph(device) if args.eager_run else None # set seed for CE @@ -119,14 +118,16 @@ def do_train(args): eval_loader, eval_steps_fn) = create_data_loader(args, device) # define model - transformer = Transformer( - args.src_vocab_size, args.trg_vocab_size, args.max_length + 1, - args.n_layer, args.n_head, args.d_key, args.d_value, args.d_model, - args.d_inner_hid, args.prepostprocess_dropout, args.attention_dropout, - args.relu_dropout, args.preprocess_cmd, args.postprocess_cmd, - args.weight_sharing, args.bos_idx, args.eos_idx) - - transformer.prepare( + model = paddle.Model( + Transformer(args.src_vocab_size, args.trg_vocab_size, + args.max_length + 1, args.n_layer, args.n_head, args.d_key, + args.d_value, args.d_model, args.d_inner_hid, + args.prepostprocess_dropout, args.attention_dropout, + args.relu_dropout, args.preprocess_cmd, + args.postprocess_cmd, args.weight_sharing, args.bos_idx, + args.eos_idx), inputs, labels) + + model.prepare( fluid.optimizer.Adam( learning_rate=fluid.layers.noam_decay( args.d_model, @@ -135,32 +136,29 @@ def do_train(args): beta1=args.beta1, beta2=args.beta2, epsilon=float(args.eps), - parameter_list=transformer.parameters()), - CrossEntropyCriterion(args.label_smooth_eps), - inputs=inputs, - labels=labels, - device=device) + parameter_list=model.parameters()), + CrossEntropyCriterion(args.label_smooth_eps)) ## init from some checkpoint, to resume the previous training if args.init_from_checkpoint: - transformer.load(args.init_from_checkpoint) + model.load(args.init_from_checkpoint) ## init from some pretrain models, to better solve the current task if args.init_from_pretrain_model: - transformer.load(args.init_from_pretrain_model, reset_optimizer=True) + model.load(args.init_from_pretrain_model, reset_optimizer=True) # model train - transformer.fit(train_data=train_loader, - eval_data=eval_loader, - epochs=args.epoch, - eval_freq=1, - save_freq=1, - save_dir=args.save_model, - callbacks=[ - TrainCallback( - args, - train_steps_fn=train_steps_fn, - eval_steps_fn=eval_steps_fn) - ]) + model.fit(train_data=train_loader, + eval_data=eval_loader, + epochs=args.epoch, + eval_freq=1, + save_freq=1, + save_dir=args.save_model, + callbacks=[ + TrainCallback( + args, + train_steps_fn=train_steps_fn, + eval_steps_fn=eval_steps_fn) + ]) if __name__ == "__main__": diff --git a/transformer/transformer.py b/transformer/transformer.py index d7c389c96bce47273537158ca21d8a571991c9f3..8359bf5b6a60de531aed8bff99ad0f021f7720d6 100644 --- a/transformer/transformer.py +++ b/transformer/transformer.py @@ -19,9 +19,7 @@ import numpy as np import paddle.fluid as fluid import paddle.fluid.layers as layers from paddle.fluid.dygraph import Embedding, LayerNorm, Linear, Layer -from paddle.incubate.hapi.model import Model -from paddle.incubate.hapi.loss import Loss -from paddle.incubate.hapi.text import TransformerBeamSearchDecoder, DynamicDecode +from paddle.text import TransformerBeamSearchDecoder, DynamicDecode def position_encoding_init(n_position, d_pos_vec): @@ -498,13 +496,12 @@ class WrapDecoder(Layer): return logits -class CrossEntropyCriterion(Loss): +class CrossEntropyCriterion(Layer): def __init__(self, label_smooth_eps): super(CrossEntropyCriterion, self).__init__() self.label_smooth_eps = label_smooth_eps - def forward(self, outputs, labels): - predict, (label, weights) = outputs[0], labels + def forward(self, predict, label, weights): if self.label_smooth_eps: label = layers.label_smooth( label=layers.one_hot( @@ -523,7 +520,7 @@ class CrossEntropyCriterion(Loss): return avg_cost -class Transformer(Model): +class Transformer(Layer): """ model """