提交 16a3a921 编写于 作者: G guosheng

Update Transformer, seq2seq, sequence_tagging to adapt to 2.0-beta hapi apis.

上级 2898c3fa
**仍在开发中,待完成**
1. download data: wget https://paddle-hapi.bj.bcebos.com/data/bert_data.tar.gz 1. download data: wget https://paddle-hapi.bj.bcebos.com/data/bert_data.tar.gz
2. unzip data: tar -zvxf bert_data.tar.gz 2. unzip data: tar -zvxf bert_data.tar.gz
......
**仍在开发中,待完成**
0. python3.7 -m pip install leveldb 0. python3.7 -m pip install leveldb
1. download data: wget https://paddle-hapi.bj.bcebos.com/data/bert_data.tar.gz 1. download data: wget https://paddle-hapi.bj.bcebos.com/data/bert_data.tar.gz
......
## 简介 **仍在开发中,待完成**
## 简介
情感是人类的一种高级智能行为,为了识别文本的情感倾向,需要深入的语义建模。另外,不同领域(如餐饮、体育)在情感的表达各不相同,因而需要有大规模覆盖各个领域的数据进行模型训练。为此,我们通过基于深度学习的语义模型和大规模数据挖掘解决上述两个问题。效果上,我们基于开源情感倾向分类数据集ChnSentiCorp进行评测。具体数据如下所示: 情感是人类的一种高级智能行为,为了识别文本的情感倾向,需要深入的语义建模。另外,不同领域(如餐饮、体育)在情感的表达各不相同,因而需要有大规模覆盖各个领域的数据进行模型训练。为此,我们通过基于深度学习的语义模型和大规模数据挖掘解决上述两个问题。效果上,我们基于开源情感倾向分类数据集ChnSentiCorp进行评测。具体数据如下所示:
......
...@@ -19,11 +19,12 @@ import random ...@@ -19,11 +19,12 @@ import random
from functools import partial from functools import partial
import numpy as np import numpy as np
import paddle
import paddle.fluid as fluid import paddle.fluid as fluid
from paddle.fluid.layers.utils import flatten from paddle.fluid.layers.utils import flatten
from paddle.fluid.io import DataLoader from paddle.fluid.io import DataLoader
from paddle.static import InputSpec as Input
from paddle.incubate.hapi.model import Input, set_device
from args import parse_args from args import parse_args
from seq2seq_base import BaseInferModel from seq2seq_base import BaseInferModel
from seq2seq_attn import AttentionInferModel from seq2seq_attn import AttentionInferModel
...@@ -48,7 +49,7 @@ def post_process_seq(seq, bos_idx, eos_idx, output_bos=False, ...@@ -48,7 +49,7 @@ def post_process_seq(seq, bos_idx, eos_idx, output_bos=False,
def do_predict(args): def do_predict(args):
device = set_device("gpu" if args.use_gpu else "cpu") device = paddle.set_device("gpu" if args.use_gpu else "cpu")
fluid.enable_dygraph(device) if args.eager_run else None fluid.enable_dygraph(device) if args.eager_run else None
# define model # define model
...@@ -84,7 +85,8 @@ def do_predict(args): ...@@ -84,7 +85,8 @@ def do_predict(args):
return_list=True) return_list=True)
model_maker = AttentionInferModel if args.attention else BaseInferModel model_maker = AttentionInferModel if args.attention else BaseInferModel
model = model_maker( model = paddle.Model(
model_maker(
args.src_vocab_size, args.src_vocab_size,
args.tar_vocab_size, args.tar_vocab_size,
args.hidden_size, args.hidden_size,
...@@ -94,9 +96,10 @@ def do_predict(args): ...@@ -94,9 +96,10 @@ def do_predict(args):
bos_id=bos_id, bos_id=bos_id,
eos_id=eos_id, eos_id=eos_id,
beam_size=args.beam_size, beam_size=args.beam_size,
max_out_len=256) max_out_len=256),
inputs=inputs)
model.prepare(inputs=inputs, device=device) model.prepare()
# load the trained model # load the trained model
assert args.reload_model, ( assert args.reload_model, (
......
...@@ -18,10 +18,7 @@ from paddle.fluid import ParamAttr ...@@ -18,10 +18,7 @@ from paddle.fluid import ParamAttr
from paddle.fluid.initializer import UniformInitializer from paddle.fluid.initializer import UniformInitializer
from paddle.fluid.dygraph import Embedding, Linear, Layer from paddle.fluid.dygraph import Embedding, Linear, Layer
from paddle.fluid.layers import BeamSearchDecoder from paddle.fluid.layers import BeamSearchDecoder
from paddle.text import DynamicDecode, RNN, BasicLSTMCell, RNNCell
from paddle.incubate.hapi.model import Model
from paddle.incubate.hapi.loss import Loss
from paddle.incubate.hapi.text import DynamicDecode, RNN, BasicLSTMCell, RNNCell
from seq2seq_base import Encoder from seq2seq_base import Encoder
...@@ -138,7 +135,7 @@ class Decoder(Layer): ...@@ -138,7 +135,7 @@ class Decoder(Layer):
return predict return predict
class AttentionModel(Model): class AttentionModel(Layer):
def __init__(self, def __init__(self,
src_vocab_size, src_vocab_size,
trg_vocab_size, trg_vocab_size,
......
...@@ -18,18 +18,14 @@ from paddle.fluid import ParamAttr ...@@ -18,18 +18,14 @@ from paddle.fluid import ParamAttr
from paddle.fluid.initializer import UniformInitializer from paddle.fluid.initializer import UniformInitializer
from paddle.fluid.dygraph import Embedding, Linear, Layer from paddle.fluid.dygraph import Embedding, Linear, Layer
from paddle.fluid.layers import BeamSearchDecoder from paddle.fluid.layers import BeamSearchDecoder
from paddle.text import DynamicDecode, RNN, BasicLSTMCell, RNNCell
from paddle.incubate.hapi.model import Model
from paddle.incubate.hapi.loss import Loss
from paddle.incubate.hapi.text import DynamicDecode, RNN, BasicLSTMCell, RNNCell
class CrossEntropyCriterion(Layer):
class CrossEntropyCriterion(Loss):
def __init__(self): def __init__(self):
super(CrossEntropyCriterion, self).__init__() super(CrossEntropyCriterion, self).__init__()
def forward(self, outputs, labels): def forward(self, predict, trg_length, label):
predict, (trg_length, label) = outputs[0], labels
# for target padding mask # for target padding mask
mask = layers.sequence_mask( mask = layers.sequence_mask(
trg_length, maxlen=layers.shape(predict)[1], dtype=predict.dtype) trg_length, maxlen=layers.shape(predict)[1], dtype=predict.dtype)
...@@ -140,7 +136,7 @@ class Decoder(Layer): ...@@ -140,7 +136,7 @@ class Decoder(Layer):
return predict return predict
class BaseModel(Model): class BaseModel(Layer):
def __init__(self, def __init__(self,
src_vocab_size, src_vocab_size,
trg_vocab_size, trg_vocab_size,
......
...@@ -15,14 +15,15 @@ ...@@ -15,14 +15,15 @@
import logging import logging
import os import os
import random import random
from args import parse_args
from functools import partial from functools import partial
import numpy as np import numpy as np
import paddle
import paddle.fluid as fluid import paddle.fluid as fluid
from paddle.fluid.io import DataLoader from paddle.fluid.io import DataLoader
from paddle.static import InputSpec as Input
from paddle.incubate.hapi.model import Input, set_device
from args import parse_args
from seq2seq_base import BaseModel, CrossEntropyCriterion from seq2seq_base import BaseModel, CrossEntropyCriterion
from seq2seq_attn import AttentionModel from seq2seq_attn import AttentionModel
from reader import create_data_loader from reader import create_data_loader
...@@ -30,7 +31,7 @@ from utility import PPL, TrainCallback, get_model_cls ...@@ -30,7 +31,7 @@ from utility import PPL, TrainCallback, get_model_cls
def do_train(args): def do_train(args):
device = set_device("gpu" if args.use_gpu else "cpu") device = paddle.set_device("gpu" if args.use_gpu else "cpu")
fluid.enable_dygraph(device) if args.eager_run else None fluid.enable_dygraph(device) if args.eager_run else None
if args.enable_ce: if args.enable_ce:
...@@ -58,9 +59,11 @@ def do_train(args): ...@@ -58,9 +59,11 @@ def do_train(args):
model_maker = get_model_cls( model_maker = get_model_cls(
AttentionModel) if args.attention else get_model_cls(BaseModel) AttentionModel) if args.attention else get_model_cls(BaseModel)
model = model_maker(args.src_vocab_size, args.tar_vocab_size, model = paddle.Model(
args.hidden_size, args.hidden_size, args.num_layers, model_maker(args.src_vocab_size, args.tar_vocab_size, args.hidden_size,
args.dropout) args.hidden_size, args.num_layers, args.dropout),
inputs=inputs,
labels=labels)
grad_clip = fluid.clip.GradientClipByGlobalNorm( grad_clip = fluid.clip.GradientClipByGlobalNorm(
clip_norm=args.max_grad_norm) clip_norm=args.max_grad_norm)
optimizer = fluid.optimizer.Adam( optimizer = fluid.optimizer.Adam(
...@@ -69,13 +72,7 @@ def do_train(args): ...@@ -69,13 +72,7 @@ def do_train(args):
grad_clip=grad_clip) grad_clip=grad_clip)
ppl_metric = PPL(reset_freq=100) # ppl for every 100 batches ppl_metric = PPL(reset_freq=100) # ppl for every 100 batches
model.prepare( model.prepare(optimizer, CrossEntropyCriterion(), ppl_metric)
optimizer,
CrossEntropyCriterion(),
ppl_metric,
inputs=inputs,
labels=labels,
device=device)
model.fit(train_data=train_loader, model.fit(train_data=train_loader,
eval_data=eval_loader, eval_data=eval_loader,
epochs=args.max_epoch, epochs=args.max_epoch,
......
...@@ -15,14 +15,13 @@ ...@@ -15,14 +15,13 @@
import math import math
import functools import functools
import paddle
import paddle.fluid as fluid import paddle.fluid as fluid
from paddle.metric import Metric
from paddle.text import BasicLSTMCell
from paddle.incubate.hapi.metrics import Metric
from paddle.incubate.hapi.callbacks import ProgBarLogger
from paddle.incubate.hapi.text import BasicLSTMCell
class TrainCallback(paddle.callbacks.ProgBarLogger):
class TrainCallback(ProgBarLogger):
def __init__(self, ppl, log_freq, verbose=2): def __init__(self, ppl, log_freq, verbose=2):
super(TrainCallback, self).__init__(log_freq, verbose) super(TrainCallback, self).__init__(log_freq, verbose)
self.ppl = ppl self.ppl = ppl
...@@ -58,7 +57,7 @@ class PPL(Metric): ...@@ -58,7 +57,7 @@ class PPL(Metric):
self.reset_freq = reset_freq self.reset_freq = reset_freq
self.reset() self.reset()
def add_metric_op(self, pred, seq_length, label): def compute(self, pred, seq_length, label):
word_num = fluid.layers.reduce_sum(seq_length) word_num = fluid.layers.reduce_sum(seq_length)
return word_num return word_num
......
...@@ -18,9 +18,10 @@ SequenceTagging eval structure ...@@ -18,9 +18,10 @@ SequenceTagging eval structure
from __future__ import division from __future__ import division
from __future__ import print_function from __future__ import print_function
import paddle
import paddle.fluid as fluid import paddle.fluid as fluid
from paddle.fluid.layers.utils import flatten from paddle.fluid.layers.utils import flatten
from paddle.incubate.hapi.model import Input, set_device from paddle.static import InputSpec as Input
from sequence_tagging import SeqTagging, LacLoss, ChunkEval from sequence_tagging import SeqTagging, LacLoss, ChunkEval
from reader import LacDataset, LacDataLoader from reader import LacDataset, LacDataLoader
...@@ -29,7 +30,7 @@ from utils.configure import PDConfig ...@@ -29,7 +30,7 @@ from utils.configure import PDConfig
def main(args): def main(args):
place = set_device(args.device) place = paddle.set_device(args.device)
fluid.enable_dygraph(place) if args.dynamic else None fluid.enable_dygraph(place) if args.dynamic else None
inputs = [ inputs = [
...@@ -45,14 +46,14 @@ def main(args): ...@@ -45,14 +46,14 @@ def main(args):
vocab_size = dataset.vocab_size vocab_size = dataset.vocab_size
num_labels = dataset.num_labels num_labels = dataset.num_labels
model = SeqTagging(args, vocab_size, num_labels, mode="test") model = paddle.Model(
SeqTagging(
args, vocab_size, num_labels, mode="test"),
inputs=inputs,
labels=labels)
model.mode = "test" model.mode = "test"
model.prepare( model.prepare(metrics=ChunkEval(num_labels))
metrics=ChunkEval(num_labels),
inputs=inputs,
labels=labels,
device=place)
model.load(args.init_from_checkpoint, skip_mismatch=True) model.load(args.init_from_checkpoint, skip_mismatch=True)
eval_result = model.evaluate( eval_result = model.evaluate(
......
...@@ -20,9 +20,10 @@ from __future__ import print_function ...@@ -20,9 +20,10 @@ from __future__ import print_function
import six import six
import paddle
import paddle.fluid as fluid import paddle.fluid as fluid
from paddle.fluid.layers.utils import flatten from paddle.fluid.layers.utils import flatten
from paddle.incubate.hapi.model import Input, set_device from paddle.static import InputSpec as Input
from sequence_tagging import SeqTagging, LacLoss, ChunkEval from sequence_tagging import SeqTagging, LacLoss, ChunkEval
from reader import LacDataset, LacDataLoader from reader import LacDataset, LacDataLoader
...@@ -31,7 +32,7 @@ from utils.configure import PDConfig ...@@ -31,7 +32,7 @@ from utils.configure import PDConfig
def main(args): def main(args):
place = set_device(args.device) place = paddle.set_device(args.device)
fluid.enable_dygraph(place) if args.dynamic else None fluid.enable_dygraph(place) if args.dynamic else None
inputs = [ inputs = [
...@@ -46,10 +47,13 @@ def main(args): ...@@ -46,10 +47,13 @@ def main(args):
vocab_size = dataset.vocab_size vocab_size = dataset.vocab_size
num_labels = dataset.num_labels num_labels = dataset.num_labels
model = SeqTagging(args, vocab_size, num_labels, mode="predict") model = paddle.Model(
SeqTagging(
args, vocab_size, num_labels, mode="predict"),
inputs=inputs)
model.mode = "test" model.mode = "test"
model.prepare(inputs=inputs) model.prepare()
model.load(args.init_from_checkpoint, skip_mismatch=True) model.load(args.init_from_checkpoint, skip_mismatch=True)
......
...@@ -25,17 +25,16 @@ import math ...@@ -25,17 +25,16 @@ import math
import argparse import argparse
import numpy as np import numpy as np
import paddle
import paddle.fluid as fluid import paddle.fluid as fluid
from paddle.incubate.hapi.metrics import Metric from paddle.metric import Metric
from paddle.incubate.hapi.model import Model from paddle.text import SequenceTagging
from paddle.incubate.hapi.loss import Loss
from paddle.incubate.hapi.text import SequenceTagging
from utils.check import check_gpu, check_version from utils.check import check_gpu, check_version
from utils.configure import PDConfig from utils.configure import PDConfig
class SeqTagging(Model): class SeqTagging(fluid.dygraph.Layer):
def __init__(self, args, vocab_size, num_labels, length=None, def __init__(self, args, vocab_size, num_labels, length=None,
mode="train"): mode="train"):
super(SeqTagging, self).__init__() super(SeqTagging, self).__init__()
...@@ -131,13 +130,13 @@ class Chunk_eval(fluid.dygraph.Layer): ...@@ -131,13 +130,13 @@ class Chunk_eval(fluid.dygraph.Layer):
return (num_infer_chunks, num_label_chunks, num_correct_chunks) return (num_infer_chunks, num_label_chunks, num_correct_chunks)
class LacLoss(Loss): class LacLoss(fluid.dygraph.Layer):
def __init__(self): def __init__(self):
super(LacLoss, self).__init__() super(LacLoss, self).__init__()
pass pass
def forward(self, outputs, labels): def forward(self, *args):
avg_cost = outputs[1] avg_cost = args[1]
return avg_cost return avg_cost
...@@ -149,7 +148,7 @@ class ChunkEval(Metric): ...@@ -149,7 +148,7 @@ class ChunkEval(Metric):
int(math.ceil((num_labels - 1) / 2.0)), "IOB") int(math.ceil((num_labels - 1) / 2.0)), "IOB")
self.reset() self.reset()
def add_metric_op(self, *args): def compute(self, *args):
crf_decode = args[0] crf_decode = args[0]
lengths = args[2] lengths = args[2]
label = args[3] label = args[3]
......
...@@ -18,9 +18,10 @@ SequenceTagging network structure ...@@ -18,9 +18,10 @@ SequenceTagging network structure
from __future__ import division from __future__ import division
from __future__ import print_function from __future__ import print_function
import paddle
import paddle.fluid as fluid import paddle.fluid as fluid
from paddle.fluid.optimizer import AdamOptimizer from paddle.fluid.optimizer import AdamOptimizer
from paddle.incubate.hapi.model import Input, set_device from paddle.static import InputSpec as Input
from sequence_tagging import SeqTagging, LacLoss, ChunkEval from sequence_tagging import SeqTagging, LacLoss, ChunkEval
from reader import LacDataset, LacDataLoader from reader import LacDataset, LacDataLoader
...@@ -29,7 +30,7 @@ from utils.configure import PDConfig ...@@ -29,7 +30,7 @@ from utils.configure import PDConfig
def main(args): def main(args):
place = set_device(args.device) place = paddle.set_device(args.device)
fluid.enable_dygraph(place) if args.dynamic else None fluid.enable_dygraph(place) if args.dynamic else None
inputs = [ inputs = [
...@@ -48,19 +49,17 @@ def main(args): ...@@ -48,19 +49,17 @@ def main(args):
vocab_size = dataset.vocab_size vocab_size = dataset.vocab_size
num_labels = dataset.num_labels num_labels = dataset.num_labels
model = SeqTagging(args, vocab_size, num_labels, mode="train") model = paddle.Model(
SeqTagging(
args, vocab_size, num_labels, mode="train"),
inputs=inputs,
labels=labels)
optim = AdamOptimizer( optim = AdamOptimizer(
learning_rate=args.base_learning_rate, learning_rate=args.base_learning_rate,
parameter_list=model.parameters()) parameter_list=model.parameters())
model.prepare( model.prepare(optim, LacLoss(), ChunkEval(num_labels))
optim,
LacLoss(),
ChunkEval(num_labels),
inputs=inputs,
labels=labels,
device=args.device)
if args.init_from_checkpoint: if args.init_from_checkpoint:
model.load(args.init_from_checkpoint) model.load(args.init_from_checkpoint)
......
...@@ -21,11 +21,11 @@ import paddle ...@@ -21,11 +21,11 @@ import paddle
import paddle.fluid as fluid import paddle.fluid as fluid
from paddle.io import DataLoader from paddle.io import DataLoader
from paddle.fluid.layers.utils import flatten from paddle.fluid.layers.utils import flatten
from paddle.static import InputSpec as Input
from utils.configure import PDConfig from utils.configure import PDConfig
from utils.check import check_gpu, check_version from utils.check import check_gpu, check_version
from paddle.incubate.hapi.model import Input, set_device
from reader import prepare_infer_input, Seq2SeqDataset, Seq2SeqBatchSampler from reader import prepare_infer_input, Seq2SeqDataset, Seq2SeqBatchSampler
from transformer import InferTransformer from transformer import InferTransformer
...@@ -48,7 +48,7 @@ def post_process_seq(seq, bos_idx, eos_idx, output_bos=False, ...@@ -48,7 +48,7 @@ def post_process_seq(seq, bos_idx, eos_idx, output_bos=False,
def do_predict(args): def do_predict(args):
device = set_device("gpu" if args.use_cuda else "cpu") device = paddle.set_device("gpu" if args.use_cuda else "cpu")
fluid.enable_dygraph(device) if args.eager_run else None fluid.enable_dygraph(device) if args.eager_run else None
inputs = [ inputs = [
...@@ -99,7 +99,8 @@ def do_predict(args): ...@@ -99,7 +99,8 @@ def do_predict(args):
return_list=True) return_list=True)
# define model # define model
transformer = InferTransformer( model = paddle.Model(
InferTransformer(
args.src_vocab_size, args.src_vocab_size,
args.trg_vocab_size, args.trg_vocab_size,
args.max_length + 1, args.max_length + 1,
...@@ -118,18 +119,19 @@ def do_predict(args): ...@@ -118,18 +119,19 @@ def do_predict(args):
args.bos_idx, args.bos_idx,
args.eos_idx, args.eos_idx,
beam_size=args.beam_size, beam_size=args.beam_size,
max_out_len=args.max_out_len) max_out_len=args.max_out_len),
transformer.prepare(inputs=inputs, device=device) inputs)
model.prepare()
# load the trained model # load the trained model
assert args.init_from_params, ( assert args.init_from_params, (
"Please set init_from_params to load the infer model.") "Please set init_from_params to load the infer model.")
transformer.load(args.init_from_params) model.load(args.init_from_params)
# TODO: use model.predict when support variant length # TODO: use model.predict when support variant length
f = open(args.output_file, "wb") f = open(args.output_file, "wb")
for data in data_loader(): for data in data_loader():
finished_seq = transformer.test_batch(inputs=flatten(data))[0] finished_seq = model.test_batch(inputs=flatten(data))[0]
finished_seq = np.transpose(finished_seq, [0, 2, 1]) finished_seq = np.transpose(finished_seq, [0, 2, 1])
for ins in finished_seq: for ins in finished_seq:
for beam_idx, beam in enumerate(ins): for beam_idx, beam in enumerate(ins):
......
...@@ -19,17 +19,16 @@ import numpy as np ...@@ -19,17 +19,16 @@ import numpy as np
import paddle import paddle
import paddle.fluid as fluid import paddle.fluid as fluid
from paddle.io import DataLoader from paddle.io import DataLoader
from paddle.static import InputSpec as Input
from utils.configure import PDConfig from utils.configure import PDConfig
from utils.check import check_gpu, check_version from utils.check import check_gpu, check_version
from paddle.incubate.hapi.model import Input, set_device
from paddle.incubate.hapi.callbacks import ProgBarLogger
from reader import create_data_loader from reader import create_data_loader
from transformer import Transformer, CrossEntropyCriterion from transformer import Transformer, CrossEntropyCriterion
class TrainCallback(ProgBarLogger): class TrainCallback(paddle.callbacks.ProgBarLogger):
def __init__(self, def __init__(self,
args, args,
verbose=2, verbose=2,
...@@ -75,7 +74,7 @@ class TrainCallback(ProgBarLogger): ...@@ -75,7 +74,7 @@ class TrainCallback(ProgBarLogger):
def do_train(args): def do_train(args):
device = set_device("gpu" if args.use_cuda else "cpu") device = paddle.set_device("gpu" if args.use_cuda else "cpu")
fluid.enable_dygraph(device) if args.eager_run else None fluid.enable_dygraph(device) if args.eager_run else None
# set seed for CE # set seed for CE
...@@ -119,14 +118,16 @@ def do_train(args): ...@@ -119,14 +118,16 @@ def do_train(args):
eval_loader, eval_steps_fn) = create_data_loader(args, device) eval_loader, eval_steps_fn) = create_data_loader(args, device)
# define model # define model
transformer = Transformer( model = paddle.Model(
args.src_vocab_size, args.trg_vocab_size, args.max_length + 1, Transformer(args.src_vocab_size, args.trg_vocab_size,
args.n_layer, args.n_head, args.d_key, args.d_value, args.d_model, args.max_length + 1, args.n_layer, args.n_head, args.d_key,
args.d_inner_hid, args.prepostprocess_dropout, args.attention_dropout, args.d_value, args.d_model, args.d_inner_hid,
args.relu_dropout, args.preprocess_cmd, args.postprocess_cmd, args.prepostprocess_dropout, args.attention_dropout,
args.weight_sharing, args.bos_idx, args.eos_idx) args.relu_dropout, args.preprocess_cmd,
args.postprocess_cmd, args.weight_sharing, args.bos_idx,
transformer.prepare( args.eos_idx), inputs, labels)
model.prepare(
fluid.optimizer.Adam( fluid.optimizer.Adam(
learning_rate=fluid.layers.noam_decay( learning_rate=fluid.layers.noam_decay(
args.d_model, args.d_model,
...@@ -135,21 +136,18 @@ def do_train(args): ...@@ -135,21 +136,18 @@ def do_train(args):
beta1=args.beta1, beta1=args.beta1,
beta2=args.beta2, beta2=args.beta2,
epsilon=float(args.eps), epsilon=float(args.eps),
parameter_list=transformer.parameters()), parameter_list=model.parameters()),
CrossEntropyCriterion(args.label_smooth_eps), CrossEntropyCriterion(args.label_smooth_eps))
inputs=inputs,
labels=labels,
device=device)
## init from some checkpoint, to resume the previous training ## init from some checkpoint, to resume the previous training
if args.init_from_checkpoint: if args.init_from_checkpoint:
transformer.load(args.init_from_checkpoint) model.load(args.init_from_checkpoint)
## init from some pretrain models, to better solve the current task ## init from some pretrain models, to better solve the current task
if args.init_from_pretrain_model: if args.init_from_pretrain_model:
transformer.load(args.init_from_pretrain_model, reset_optimizer=True) model.load(args.init_from_pretrain_model, reset_optimizer=True)
# model train # model train
transformer.fit(train_data=train_loader, model.fit(train_data=train_loader,
eval_data=eval_loader, eval_data=eval_loader,
epochs=args.epoch, epochs=args.epoch,
eval_freq=1, eval_freq=1,
......
...@@ -19,9 +19,7 @@ import numpy as np ...@@ -19,9 +19,7 @@ import numpy as np
import paddle.fluid as fluid import paddle.fluid as fluid
import paddle.fluid.layers as layers import paddle.fluid.layers as layers
from paddle.fluid.dygraph import Embedding, LayerNorm, Linear, Layer from paddle.fluid.dygraph import Embedding, LayerNorm, Linear, Layer
from paddle.incubate.hapi.model import Model from paddle.text import TransformerBeamSearchDecoder, DynamicDecode
from paddle.incubate.hapi.loss import Loss
from paddle.incubate.hapi.text import TransformerBeamSearchDecoder, DynamicDecode
def position_encoding_init(n_position, d_pos_vec): def position_encoding_init(n_position, d_pos_vec):
...@@ -498,13 +496,12 @@ class WrapDecoder(Layer): ...@@ -498,13 +496,12 @@ class WrapDecoder(Layer):
return logits return logits
class CrossEntropyCriterion(Loss): class CrossEntropyCriterion(Layer):
def __init__(self, label_smooth_eps): def __init__(self, label_smooth_eps):
super(CrossEntropyCriterion, self).__init__() super(CrossEntropyCriterion, self).__init__()
self.label_smooth_eps = label_smooth_eps self.label_smooth_eps = label_smooth_eps
def forward(self, outputs, labels): def forward(self, predict, label, weights):
predict, (label, weights) = outputs[0], labels
if self.label_smooth_eps: if self.label_smooth_eps:
label = layers.label_smooth( label = layers.label_smooth(
label=layers.one_hot( label=layers.one_hot(
...@@ -523,7 +520,7 @@ class CrossEntropyCriterion(Loss): ...@@ -523,7 +520,7 @@ class CrossEntropyCriterion(Loss):
return avg_cost return avg_cost
class Transformer(Model): class Transformer(Layer):
""" """
model model
""" """
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册