未验证 提交 588d8dc4 编写于 作者: B bbking 提交者: GitHub

update PaddleNLP emotion_detection and ernie for Release/1.6 (#3608)

* emotion-detection => 1.6

* ERNIE => 1.6

* [PaddleNLP] update emotion_detection readme
上级 0a106a5d
......@@ -33,7 +33,7 @@
1. PaddlePaddle 安装
本项目依赖于 PaddlePaddle Fluid 1.3.2 及以上版本,请参考 [安装指南](http://www.paddlepaddle.org/#quick-start) 进行安装
本项目依赖于 PaddlePaddle Fluid 1.6 及以上版本,请参考 [安装指南](http://www.paddlepaddle.org/#quick-start) 进行安装
2. 代码安装
......@@ -46,7 +46,7 @@
3. 环境依赖
请参考 PaddlePaddle [安装说明](https://www.paddlepaddle.org.cn/documentation/docs/zh/1.5/beginners_guide/install/index_cn.html) 部分的内容
Python 2 的版本要求 2.7.15+,Python 3 的版本要求 3.5.1+/3.6/3.7,其它环境请参考 PaddlePaddle [安装说明](https://www.paddlepaddle.org.cn/documentation/docs/zh/1.5/beginners_guide/install/index_cn.html) 部分的内容
### 代码结构说明
......
#!/bin/bash
mkdir -p models
cd models
mkdir -p pretrain_models
cd pretrain_models
# download pretrain model file to ./models/
MODEL_CNN=https://baidu-nlp.bj.bcebos.com/emotion_detection_textcnn-1.0.0.tar.gz
......
......@@ -44,9 +44,8 @@ def do_save_inference_model(args):
with fluid.program_guard(test_prog, startup_prog):
with fluid.unique_name.guard():
infer_pyreader, probs, feed_target_names = create_model(
infer_loader, probs, feed_target_names = create_model(
args,
pyreader_name='infer_reader',
num_labels=args.num_labels,
is_prediction=True)
......@@ -79,20 +78,7 @@ def test_inference_model(args, texts):
dev_count = int(os.environ.get('CPU_NUM', 1))
place = fluid.CPUPlace()
test_prog = fluid.default_main_program()
startup_prog = fluid.default_startup_program()
with fluid.program_guard(test_prog, startup_prog):
with fluid.unique_name.guard():
infer_pyreader, probs, feed_target_names = create_model(
args,
pyreader_name='infer_reader',
num_labels=args.num_labels,
is_prediction=True)
test_prog = test_prog.clone(for_test=True)
exe = fluid.Executor(place)
exe.run(startup_prog)
assert (args.inference_model_dir)
infer_program, feed_names, fetch_targets = fluid.io.load_inference_model(
......@@ -107,9 +93,8 @@ def test_inference_model(args, texts):
wids, seq_len = utils.pad_wid(wids)
data.append(wids)
seq_lens.append(seq_len)
batch_size = len(data)
data = np.array(data).reshape((batch_size, 128, 1))
seq_lens = np.array(seq_lens).reshape((batch_size, 1))
data = np.array(data)
seq_lens = np.array(seq_lens)
pred = exe.run(infer_program,
feed={
......
......@@ -96,16 +96,16 @@ class EmoTectProcessor(object):
Generate data for train, dev or test
"""
if phase == "train":
return paddle.batch(
return fluid.io.batch(
self.get_train_examples(self.data_dir, epoch, self.max_seq_len), batch_size)
elif phase == "dev":
return paddle.batch(
return fluid.io.batch(
self.get_dev_examples(self.data_dir, epoch, self.max_seq_len), batch_size)
elif phase == "test":
return paddle.batch(
return fluid.io.batch(
self.get_test_examples(self.data_dir, epoch, self.max_seq_len), batch_size)
elif phase == "infer":
return paddle.batch(
return fluid.io.batch(
self.get_infer_examples(self.data_dir, epoch, self.max_seq_len), batch_size)
else:
raise ValueError(
......
......@@ -32,30 +32,30 @@ import numpy as np
from models.classification import nets
from models.model_check import check_cuda
from models.model_check import check_version
from config import PDConfig
import reader
import utils
def create_model(args,
pyreader_name,
num_labels,
is_prediction=False):
"""
Create Model for Emotion Detection
"""
data = fluid.layers.data(name="words", shape=[-1, args.max_seq_len, 1], dtype="int64")
label = fluid.layers.data(name="label", shape=[-1, 1], dtype="int64")
seq_len = fluid.layers.data(name="seq_len", shape=[-1, 1], dtype="int64")
data = fluid.data(name="words", shape=[-1, args.max_seq_len], dtype="int64")
label = fluid.data(name="label", shape=[-1, 1], dtype="int64")
seq_len = fluid.data(name="seq_len", shape=[-1], dtype="int64")
if is_prediction:
pyreader = fluid.io.PyReader(
loader = fluid.io.DataLoader.from_generator(
feed_list=[data, seq_len],
capacity=16,
iterable=False,
return_list=False)
else:
pyreader = fluid.io.PyReader(
loader = fluid.io.DataLoader.from_generator(
feed_list=[data, label, seq_len],
capacity=16,
iterable=False,
......@@ -78,19 +78,19 @@ def create_model(args,
if is_prediction:
probs = network(data, seq_len, None, args.vocab_size, class_dim=num_labels, is_prediction=True)
return pyreader, probs, [data.name, seq_len.name]
return loader, probs, [data.name, seq_len.name]
avg_loss, probs = network(data, seq_len, label, args.vocab_size, class_dim=num_labels)
num_seqs = fluid.layers.create_tensor(dtype='int64')
accuracy = fluid.layers.accuracy(input=probs, label=label, total=num_seqs)
return pyreader, avg_loss, accuracy, num_seqs
return loader, avg_loss, accuracy, num_seqs
def evaluate(exe, test_program, test_pyreader, fetch_list, eval_phase):
def evaluate(exe, test_program, test_loader, fetch_list, eval_phase):
"""
Evaluation Function
"""
test_pyreader.start()
test_loader.start()
total_cost, total_acc, total_num_seqs = [], [], []
time_begin = time.time()
while True:
......@@ -105,7 +105,7 @@ def evaluate(exe, test_program, test_pyreader, fetch_list, eval_phase):
total_acc.extend(np_acc * np_num_seqs)
total_num_seqs.extend(np_num_seqs)
except fluid.core.EOFException:
test_pyreader.reset()
test_loader.reset()
break
time_end = time.time()
print("[%s evaluation] avg loss: %f, avg acc: %f, elapsed time: %f s" %
......@@ -113,8 +113,8 @@ def evaluate(exe, test_program, test_pyreader, fetch_list, eval_phase):
np.sum(total_acc) / np.sum(total_num_seqs), time_end - time_begin))
def infer(exe, infer_program, infer_pyreader, fetch_list, infer_phase):
infer_pyreader.start()
def infer(exe, infer_program, infer_loader, fetch_list, infer_phase):
infer_loader.start()
time_begin = time.time()
while True:
try:
......@@ -125,7 +125,7 @@ def infer(exe, infer_program, infer_pyreader, fetch_list, infer_phase):
print("%d\t%f\t%f\t%f" %
(np.argmax(probs), probs[0], probs[1], probs[2]))
except fluid.core.EOFException as e:
infer_pyreader.reset()
infer_loader.reset()
break
time_end = time.time()
print("[%s] elapsed time: %f s" % (infer_phase, time_end - time_begin))
......@@ -172,9 +172,8 @@ def main(args):
with fluid.program_guard(train_program, startup_prog):
with fluid.unique_name.guard():
train_pyreader, loss, accuracy, num_seqs = create_model(
train_loader, loss, accuracy, num_seqs = create_model(
args,
pyreader_name='train_reader',
num_labels=num_labels,
is_prediction=False)
......@@ -202,9 +201,8 @@ def main(args):
test_prog = fluid.Program()
with fluid.program_guard(test_prog, startup_prog):
with fluid.unique_name.guard():
test_pyreader, loss, accuracy, num_seqs = create_model(
test_loader, loss, accuracy, num_seqs = create_model(
args,
pyreader_name='test_reader',
num_labels=num_labels,
is_prediction=False)
test_prog = test_prog.clone(for_test=True)
......@@ -218,9 +216,8 @@ def main(args):
test_prog = fluid.Program()
with fluid.program_guard(test_prog, startup_prog):
with fluid.unique_name.guard():
infer_pyreader, probs, _ = create_model(
infer_loader, probs, _ = create_model(
args,
pyreader_name='infer_reader',
num_labels=num_labels,
is_prediction=True)
test_prog = test_prog.clone(for_test=True)
......@@ -239,18 +236,18 @@ def main(args):
if args.do_train:
train_exe = exe
train_pyreader.decorate_sample_list_generator(train_data_generator)
train_loader.set_sample_list_generator(train_data_generator)
else:
train_exe = None
if args.do_val:
test_exe = exe
test_pyreader.decorate_sample_list_generator(test_data_generator)
test_loader.set_sample_list_generator(test_data_generator)
if args.do_infer:
test_exe = exe
infer_pyreader.decorate_sample_list_generator(infer_data_generator)
infer_loader.set_sample_list_generator(infer_data_generator)
if args.do_train:
train_pyreader.start()
train_loader.start()
steps = 0
total_cost, total_acc, total_num_seqs = [], [], []
time_begin = time.time()
......@@ -276,7 +273,7 @@ def main(args):
total_num_seqs.extend(np_num_seqs)
if args.verbose:
verbose = "train pyreader queue size: %d, " % train_pyreader.queue.size(
verbose = "train loader queue size: %d, " % train_loader.queue.size(
)
print(verbose)
......@@ -301,20 +298,20 @@ def main(args):
if steps % args.validation_steps == 0:
# evaluate on dev set
if args.do_val:
evaluate(test_exe, test_prog, test_pyreader,
evaluate(test_exe, test_prog, test_loader,
[loss.name, accuracy.name, num_seqs.name],
"dev")
except fluid.core.EOFException:
print("final step: %d " % steps)
if args.do_val:
evaluate(test_exe, test_prog, test_pyreader,
evaluate(test_exe, test_prog, test_loader,
[loss.name, accuracy.name, num_seqs.name],
"dev")
save_path = os.path.join(args.save_checkpoint_dir, "step_" + str(steps))
fluid.io.save_persistables(exe, save_path, train_program)
train_pyreader.reset()
train_loader.reset()
break
if args.do_train and args.enable_ce:
......@@ -336,14 +333,14 @@ def main(args):
# evaluate on test set
if not args.do_train and args.do_val:
print("Final test result:")
evaluate(test_exe, test_prog, test_pyreader,
evaluate(test_exe, test_prog, test_loader,
[loss.name, accuracy.name, num_seqs.name],
"test")
# infer
if args.do_infer:
print("Final infer result:")
infer(test_exe, test_prog, infer_pyreader,
infer(test_exe, test_prog, infer_loader,
[probs.name],
"infer")
......@@ -361,4 +358,5 @@ if __name__ == "__main__":
args.build()
args.print_arguments()
check_cuda(args.use_cuda)
check_version()
main(args)
......@@ -305,7 +305,7 @@ def main(args):
if args.do_train:
train_exe = exe
train_pyreader.decorate_tensor_provider(train_data_generator)
train_pyreader.set_batch_generator(train_data_generator)
else:
train_exe = None
if args.do_val or args.do_infer:
......@@ -355,7 +355,7 @@ def main(args):
if steps % args.validation_steps == 0:
# evaluate dev set
if args.do_val:
test_pyreader.decorate_tensor_provider(
test_pyreader.set_batch_generator(
reader.data_generator(
input_file=args.dev_set,
batch_size=args.batch_size,
......@@ -375,7 +375,7 @@ def main(args):
# eval on test set
if not args.do_train and args.do_val:
test_pyreader.decorate_tensor_provider(
test_pyreader.set_batch_generator(
reader.data_generator(
input_file=args.test_set,
batch_size=args.batch_size, phase='test', epoch=1,
......@@ -386,7 +386,7 @@ def main(args):
# infer on infer set
if args.do_infer:
infer_pyreader.decorate_tensor_provider(
infer_pyreader.set_batch_generator(
reader.data_generator(
input_file=args.infer_set,
batch_size=args.batch_size,
......
......@@ -17,7 +17,7 @@ def bow_net(data,
Bow net
"""
# embedding layer
emb = fluid.layers.embedding(input=data, size=[dict_dim, emb_dim])
emb = fluid.embedding(input=data, size=[dict_dim, emb_dim])
emb = fluid.layers.sequence_unpad(emb, length=seq_len)
# bow layer
bow = fluid.layers.sequence_pool(input=emb, pool_type='sum')
......@@ -50,7 +50,7 @@ def cnn_net(data,
Conv net
"""
# embedding layer
emb = fluid.layers.embedding(input=data, size=[dict_dim, emb_dim])
emb = fluid.embedding(input=data, size=[dict_dim, emb_dim])
emb = fluid.layers.sequence_unpad(emb, length=seq_len)
# convolution layer
conv_3 = fluid.nets.sequence_conv_pool(
......@@ -87,7 +87,7 @@ def lstm_net(data,
Lstm net
"""
# embedding layer
emb = fluid.layers.embedding(
emb = fluid.embedding(
input=data,
size=[dict_dim, emb_dim],
param_attr=fluid.ParamAttr(learning_rate=emb_lr))
......@@ -129,7 +129,7 @@ def bilstm_net(data,
Bi-Lstm net
"""
# embedding layer
emb = fluid.layers.embedding(
emb = fluid.embedding(
input=data,
size=[dict_dim, emb_dim],
param_attr=fluid.ParamAttr(learning_rate=emb_lr))
......@@ -175,7 +175,7 @@ def gru_net(data,
"""
gru net
"""
emb = fluid.layers.embedding(
emb = fluid.embedding(
input=data,
size=[dict_dim, emb_dim],
param_attr=fluid.ParamAttr(learning_rate=emb_lr))
......@@ -216,7 +216,7 @@ def textcnn_net(data,
win_sizes = [1, 2, 3]
# embedding layer
emb = fluid.layers.embedding(input=data, size=[dict_dim, emb_dim])
emb = fluid.embedding(input=data, size=[dict_dim, emb_dim])
emb = fluid.layers.sequence_unpad(emb, length=seq_len)
# convolution layer
convs = []
......
......@@ -29,6 +29,21 @@ def check_cuda(use_cuda, err = \
except Exception as e:
pass
def check_version():
"""
Log error and exit when the installed version of paddlepaddle is
not satisfied.
"""
err = "PaddlePaddle version 1.6 or higher is required, " \
"or a suitable develop version is satisfied as well. \n" \
"Please make sure the version is good with your code." \
try:
fluid.require_version('1.6.0')
except Exception as e:
print(err)
sys.exit(1)
if __name__ == "__main__":
check_cuda(True)
......
......@@ -30,19 +30,19 @@ from models.transformer_encoder import encoder, pre_process_layer
def ernie_pyreader(args, pyreader_name):
"""define standard ernie pyreader"""
pyreader = fluid.layers.py_reader(
src_ids = fluid.data(name='1', shape=[-1, args.max_seq_len, 1], dtype='int64')
sent_ids = fluid.data(name='2', shape=[-1, args.max_seq_len, 1], dtype='int64')
pos_ids = fluid.data(name='3', shape=[-1, args.max_seq_len, 1], dtype='int64')
input_mask = fluid.data(name='4', shape=[-1, args.max_seq_len, 1], dtype='float32')
labels = fluid.data(name='5', shape=[-1, 1], dtype='int64')
seq_lens = fluid.data(name='6', shape=[-1], dtype='int64')
pyreader = fluid.io.DataLoader.from_generator(
feed_list=[src_ids, sent_ids, pos_ids, input_mask, labels, seq_lens],
capacity=50,
shapes=[[-1, args.max_seq_len, 1], [-1, args.max_seq_len, 1],
[-1, args.max_seq_len, 1], [-1, args.max_seq_len, 1], [-1, 1],
[-1, 1]],
dtypes=['int64', 'int64', 'int64', 'float32', 'int64', 'int64'],
lod_levels=[0, 0, 0, 0, 0, 0],
name=pyreader_name,
iterable=False,
use_double_buffer=True)
(src_ids, sent_ids, pos_ids, input_mask, labels,
seq_lens) = fluid.layers.read_file(pyreader)
ernie_inputs = {
"src_ids": src_ids,
"sent_ids": sent_ids,
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册