未验证 提交 588d8dc4 编写于 作者: B bbking 提交者: GitHub

update PaddleNLP emotion_detection and ernie for Release/1.6 (#3608)

* emotion-detection => 1.6

* ERNIE => 1.6

* [PaddleNLP] update emotion_detection readme
上级 0a106a5d
...@@ -33,7 +33,7 @@ ...@@ -33,7 +33,7 @@
1. PaddlePaddle 安装 1. PaddlePaddle 安装
本项目依赖于 PaddlePaddle Fluid 1.3.2 及以上版本,请参考 [安装指南](http://www.paddlepaddle.org/#quick-start) 进行安装 本项目依赖于 PaddlePaddle Fluid 1.6 及以上版本,请参考 [安装指南](http://www.paddlepaddle.org/#quick-start) 进行安装
2. 代码安装 2. 代码安装
...@@ -46,7 +46,7 @@ ...@@ -46,7 +46,7 @@
3. 环境依赖 3. 环境依赖
请参考 PaddlePaddle [安装说明](https://www.paddlepaddle.org.cn/documentation/docs/zh/1.5/beginners_guide/install/index_cn.html) 部分的内容 Python 2 的版本要求 2.7.15+,Python 3 的版本要求 3.5.1+/3.6/3.7,其它环境请参考 PaddlePaddle [安装说明](https://www.paddlepaddle.org.cn/documentation/docs/zh/1.5/beginners_guide/install/index_cn.html) 部分的内容
### 代码结构说明 ### 代码结构说明
......
#!/bin/bash #!/bin/bash
mkdir -p models mkdir -p pretrain_models
cd models cd pretrain_models
# download pretrain model file to ./models/ # download pretrain model file to ./models/
MODEL_CNN=https://baidu-nlp.bj.bcebos.com/emotion_detection_textcnn-1.0.0.tar.gz MODEL_CNN=https://baidu-nlp.bj.bcebos.com/emotion_detection_textcnn-1.0.0.tar.gz
......
...@@ -44,9 +44,8 @@ def do_save_inference_model(args): ...@@ -44,9 +44,8 @@ def do_save_inference_model(args):
with fluid.program_guard(test_prog, startup_prog): with fluid.program_guard(test_prog, startup_prog):
with fluid.unique_name.guard(): with fluid.unique_name.guard():
infer_pyreader, probs, feed_target_names = create_model( infer_loader, probs, feed_target_names = create_model(
args, args,
pyreader_name='infer_reader',
num_labels=args.num_labels, num_labels=args.num_labels,
is_prediction=True) is_prediction=True)
...@@ -79,20 +78,7 @@ def test_inference_model(args, texts): ...@@ -79,20 +78,7 @@ def test_inference_model(args, texts):
dev_count = int(os.environ.get('CPU_NUM', 1)) dev_count = int(os.environ.get('CPU_NUM', 1))
place = fluid.CPUPlace() place = fluid.CPUPlace()
test_prog = fluid.default_main_program()
startup_prog = fluid.default_startup_program()
with fluid.program_guard(test_prog, startup_prog):
with fluid.unique_name.guard():
infer_pyreader, probs, feed_target_names = create_model(
args,
pyreader_name='infer_reader',
num_labels=args.num_labels,
is_prediction=True)
test_prog = test_prog.clone(for_test=True)
exe = fluid.Executor(place) exe = fluid.Executor(place)
exe.run(startup_prog)
assert (args.inference_model_dir) assert (args.inference_model_dir)
infer_program, feed_names, fetch_targets = fluid.io.load_inference_model( infer_program, feed_names, fetch_targets = fluid.io.load_inference_model(
...@@ -107,9 +93,8 @@ def test_inference_model(args, texts): ...@@ -107,9 +93,8 @@ def test_inference_model(args, texts):
wids, seq_len = utils.pad_wid(wids) wids, seq_len = utils.pad_wid(wids)
data.append(wids) data.append(wids)
seq_lens.append(seq_len) seq_lens.append(seq_len)
batch_size = len(data) data = np.array(data)
data = np.array(data).reshape((batch_size, 128, 1)) seq_lens = np.array(seq_lens)
seq_lens = np.array(seq_lens).reshape((batch_size, 1))
pred = exe.run(infer_program, pred = exe.run(infer_program,
feed={ feed={
......
...@@ -96,16 +96,16 @@ class EmoTectProcessor(object): ...@@ -96,16 +96,16 @@ class EmoTectProcessor(object):
Generate data for train, dev or test Generate data for train, dev or test
""" """
if phase == "train": if phase == "train":
return paddle.batch( return fluid.io.batch(
self.get_train_examples(self.data_dir, epoch, self.max_seq_len), batch_size) self.get_train_examples(self.data_dir, epoch, self.max_seq_len), batch_size)
elif phase == "dev": elif phase == "dev":
return paddle.batch( return fluid.io.batch(
self.get_dev_examples(self.data_dir, epoch, self.max_seq_len), batch_size) self.get_dev_examples(self.data_dir, epoch, self.max_seq_len), batch_size)
elif phase == "test": elif phase == "test":
return paddle.batch( return fluid.io.batch(
self.get_test_examples(self.data_dir, epoch, self.max_seq_len), batch_size) self.get_test_examples(self.data_dir, epoch, self.max_seq_len), batch_size)
elif phase == "infer": elif phase == "infer":
return paddle.batch( return fluid.io.batch(
self.get_infer_examples(self.data_dir, epoch, self.max_seq_len), batch_size) self.get_infer_examples(self.data_dir, epoch, self.max_seq_len), batch_size)
else: else:
raise ValueError( raise ValueError(
......
...@@ -32,30 +32,30 @@ import numpy as np ...@@ -32,30 +32,30 @@ import numpy as np
from models.classification import nets from models.classification import nets
from models.model_check import check_cuda from models.model_check import check_cuda
from models.model_check import check_version
from config import PDConfig from config import PDConfig
import reader import reader
import utils import utils
def create_model(args, def create_model(args,
pyreader_name,
num_labels, num_labels,
is_prediction=False): is_prediction=False):
""" """
Create Model for Emotion Detection Create Model for Emotion Detection
""" """
data = fluid.layers.data(name="words", shape=[-1, args.max_seq_len, 1], dtype="int64") data = fluid.data(name="words", shape=[-1, args.max_seq_len], dtype="int64")
label = fluid.layers.data(name="label", shape=[-1, 1], dtype="int64") label = fluid.data(name="label", shape=[-1, 1], dtype="int64")
seq_len = fluid.layers.data(name="seq_len", shape=[-1, 1], dtype="int64") seq_len = fluid.data(name="seq_len", shape=[-1], dtype="int64")
if is_prediction: if is_prediction:
pyreader = fluid.io.PyReader( loader = fluid.io.DataLoader.from_generator(
feed_list=[data, seq_len], feed_list=[data, seq_len],
capacity=16, capacity=16,
iterable=False, iterable=False,
return_list=False) return_list=False)
else: else:
pyreader = fluid.io.PyReader( loader = fluid.io.DataLoader.from_generator(
feed_list=[data, label, seq_len], feed_list=[data, label, seq_len],
capacity=16, capacity=16,
iterable=False, iterable=False,
...@@ -78,19 +78,19 @@ def create_model(args, ...@@ -78,19 +78,19 @@ def create_model(args,
if is_prediction: if is_prediction:
probs = network(data, seq_len, None, args.vocab_size, class_dim=num_labels, is_prediction=True) probs = network(data, seq_len, None, args.vocab_size, class_dim=num_labels, is_prediction=True)
return pyreader, probs, [data.name, seq_len.name] return loader, probs, [data.name, seq_len.name]
avg_loss, probs = network(data, seq_len, label, args.vocab_size, class_dim=num_labels) avg_loss, probs = network(data, seq_len, label, args.vocab_size, class_dim=num_labels)
num_seqs = fluid.layers.create_tensor(dtype='int64') num_seqs = fluid.layers.create_tensor(dtype='int64')
accuracy = fluid.layers.accuracy(input=probs, label=label, total=num_seqs) accuracy = fluid.layers.accuracy(input=probs, label=label, total=num_seqs)
return pyreader, avg_loss, accuracy, num_seqs return loader, avg_loss, accuracy, num_seqs
def evaluate(exe, test_program, test_pyreader, fetch_list, eval_phase): def evaluate(exe, test_program, test_loader, fetch_list, eval_phase):
""" """
Evaluation Function Evaluation Function
""" """
test_pyreader.start() test_loader.start()
total_cost, total_acc, total_num_seqs = [], [], [] total_cost, total_acc, total_num_seqs = [], [], []
time_begin = time.time() time_begin = time.time()
while True: while True:
...@@ -105,7 +105,7 @@ def evaluate(exe, test_program, test_pyreader, fetch_list, eval_phase): ...@@ -105,7 +105,7 @@ def evaluate(exe, test_program, test_pyreader, fetch_list, eval_phase):
total_acc.extend(np_acc * np_num_seqs) total_acc.extend(np_acc * np_num_seqs)
total_num_seqs.extend(np_num_seqs) total_num_seqs.extend(np_num_seqs)
except fluid.core.EOFException: except fluid.core.EOFException:
test_pyreader.reset() test_loader.reset()
break break
time_end = time.time() time_end = time.time()
print("[%s evaluation] avg loss: %f, avg acc: %f, elapsed time: %f s" % print("[%s evaluation] avg loss: %f, avg acc: %f, elapsed time: %f s" %
...@@ -113,8 +113,8 @@ def evaluate(exe, test_program, test_pyreader, fetch_list, eval_phase): ...@@ -113,8 +113,8 @@ def evaluate(exe, test_program, test_pyreader, fetch_list, eval_phase):
np.sum(total_acc) / np.sum(total_num_seqs), time_end - time_begin)) np.sum(total_acc) / np.sum(total_num_seqs), time_end - time_begin))
def infer(exe, infer_program, infer_pyreader, fetch_list, infer_phase): def infer(exe, infer_program, infer_loader, fetch_list, infer_phase):
infer_pyreader.start() infer_loader.start()
time_begin = time.time() time_begin = time.time()
while True: while True:
try: try:
...@@ -125,7 +125,7 @@ def infer(exe, infer_program, infer_pyreader, fetch_list, infer_phase): ...@@ -125,7 +125,7 @@ def infer(exe, infer_program, infer_pyreader, fetch_list, infer_phase):
print("%d\t%f\t%f\t%f" % print("%d\t%f\t%f\t%f" %
(np.argmax(probs), probs[0], probs[1], probs[2])) (np.argmax(probs), probs[0], probs[1], probs[2]))
except fluid.core.EOFException as e: except fluid.core.EOFException as e:
infer_pyreader.reset() infer_loader.reset()
break break
time_end = time.time() time_end = time.time()
print("[%s] elapsed time: %f s" % (infer_phase, time_end - time_begin)) print("[%s] elapsed time: %f s" % (infer_phase, time_end - time_begin))
...@@ -172,9 +172,8 @@ def main(args): ...@@ -172,9 +172,8 @@ def main(args):
with fluid.program_guard(train_program, startup_prog): with fluid.program_guard(train_program, startup_prog):
with fluid.unique_name.guard(): with fluid.unique_name.guard():
train_pyreader, loss, accuracy, num_seqs = create_model( train_loader, loss, accuracy, num_seqs = create_model(
args, args,
pyreader_name='train_reader',
num_labels=num_labels, num_labels=num_labels,
is_prediction=False) is_prediction=False)
...@@ -202,9 +201,8 @@ def main(args): ...@@ -202,9 +201,8 @@ def main(args):
test_prog = fluid.Program() test_prog = fluid.Program()
with fluid.program_guard(test_prog, startup_prog): with fluid.program_guard(test_prog, startup_prog):
with fluid.unique_name.guard(): with fluid.unique_name.guard():
test_pyreader, loss, accuracy, num_seqs = create_model( test_loader, loss, accuracy, num_seqs = create_model(
args, args,
pyreader_name='test_reader',
num_labels=num_labels, num_labels=num_labels,
is_prediction=False) is_prediction=False)
test_prog = test_prog.clone(for_test=True) test_prog = test_prog.clone(for_test=True)
...@@ -218,9 +216,8 @@ def main(args): ...@@ -218,9 +216,8 @@ def main(args):
test_prog = fluid.Program() test_prog = fluid.Program()
with fluid.program_guard(test_prog, startup_prog): with fluid.program_guard(test_prog, startup_prog):
with fluid.unique_name.guard(): with fluid.unique_name.guard():
infer_pyreader, probs, _ = create_model( infer_loader, probs, _ = create_model(
args, args,
pyreader_name='infer_reader',
num_labels=num_labels, num_labels=num_labels,
is_prediction=True) is_prediction=True)
test_prog = test_prog.clone(for_test=True) test_prog = test_prog.clone(for_test=True)
...@@ -239,18 +236,18 @@ def main(args): ...@@ -239,18 +236,18 @@ def main(args):
if args.do_train: if args.do_train:
train_exe = exe train_exe = exe
train_pyreader.decorate_sample_list_generator(train_data_generator) train_loader.set_sample_list_generator(train_data_generator)
else: else:
train_exe = None train_exe = None
if args.do_val: if args.do_val:
test_exe = exe test_exe = exe
test_pyreader.decorate_sample_list_generator(test_data_generator) test_loader.set_sample_list_generator(test_data_generator)
if args.do_infer: if args.do_infer:
test_exe = exe test_exe = exe
infer_pyreader.decorate_sample_list_generator(infer_data_generator) infer_loader.set_sample_list_generator(infer_data_generator)
if args.do_train: if args.do_train:
train_pyreader.start() train_loader.start()
steps = 0 steps = 0
total_cost, total_acc, total_num_seqs = [], [], [] total_cost, total_acc, total_num_seqs = [], [], []
time_begin = time.time() time_begin = time.time()
...@@ -276,7 +273,7 @@ def main(args): ...@@ -276,7 +273,7 @@ def main(args):
total_num_seqs.extend(np_num_seqs) total_num_seqs.extend(np_num_seqs)
if args.verbose: if args.verbose:
verbose = "train pyreader queue size: %d, " % train_pyreader.queue.size( verbose = "train loader queue size: %d, " % train_loader.queue.size(
) )
print(verbose) print(verbose)
...@@ -301,20 +298,20 @@ def main(args): ...@@ -301,20 +298,20 @@ def main(args):
if steps % args.validation_steps == 0: if steps % args.validation_steps == 0:
# evaluate on dev set # evaluate on dev set
if args.do_val: if args.do_val:
evaluate(test_exe, test_prog, test_pyreader, evaluate(test_exe, test_prog, test_loader,
[loss.name, accuracy.name, num_seqs.name], [loss.name, accuracy.name, num_seqs.name],
"dev") "dev")
except fluid.core.EOFException: except fluid.core.EOFException:
print("final step: %d " % steps) print("final step: %d " % steps)
if args.do_val: if args.do_val:
evaluate(test_exe, test_prog, test_pyreader, evaluate(test_exe, test_prog, test_loader,
[loss.name, accuracy.name, num_seqs.name], [loss.name, accuracy.name, num_seqs.name],
"dev") "dev")
save_path = os.path.join(args.save_checkpoint_dir, "step_" + str(steps)) save_path = os.path.join(args.save_checkpoint_dir, "step_" + str(steps))
fluid.io.save_persistables(exe, save_path, train_program) fluid.io.save_persistables(exe, save_path, train_program)
train_pyreader.reset() train_loader.reset()
break break
if args.do_train and args.enable_ce: if args.do_train and args.enable_ce:
...@@ -336,14 +333,14 @@ def main(args): ...@@ -336,14 +333,14 @@ def main(args):
# evaluate on test set # evaluate on test set
if not args.do_train and args.do_val: if not args.do_train and args.do_val:
print("Final test result:") print("Final test result:")
evaluate(test_exe, test_prog, test_pyreader, evaluate(test_exe, test_prog, test_loader,
[loss.name, accuracy.name, num_seqs.name], [loss.name, accuracy.name, num_seqs.name],
"test") "test")
# infer # infer
if args.do_infer: if args.do_infer:
print("Final infer result:") print("Final infer result:")
infer(test_exe, test_prog, infer_pyreader, infer(test_exe, test_prog, infer_loader,
[probs.name], [probs.name],
"infer") "infer")
...@@ -361,4 +358,5 @@ if __name__ == "__main__": ...@@ -361,4 +358,5 @@ if __name__ == "__main__":
args.build() args.build()
args.print_arguments() args.print_arguments()
check_cuda(args.use_cuda) check_cuda(args.use_cuda)
check_version()
main(args) main(args)
...@@ -305,7 +305,7 @@ def main(args): ...@@ -305,7 +305,7 @@ def main(args):
if args.do_train: if args.do_train:
train_exe = exe train_exe = exe
train_pyreader.decorate_tensor_provider(train_data_generator) train_pyreader.set_batch_generator(train_data_generator)
else: else:
train_exe = None train_exe = None
if args.do_val or args.do_infer: if args.do_val or args.do_infer:
...@@ -355,7 +355,7 @@ def main(args): ...@@ -355,7 +355,7 @@ def main(args):
if steps % args.validation_steps == 0: if steps % args.validation_steps == 0:
# evaluate dev set # evaluate dev set
if args.do_val: if args.do_val:
test_pyreader.decorate_tensor_provider( test_pyreader.set_batch_generator(
reader.data_generator( reader.data_generator(
input_file=args.dev_set, input_file=args.dev_set,
batch_size=args.batch_size, batch_size=args.batch_size,
...@@ -375,7 +375,7 @@ def main(args): ...@@ -375,7 +375,7 @@ def main(args):
# eval on test set # eval on test set
if not args.do_train and args.do_val: if not args.do_train and args.do_val:
test_pyreader.decorate_tensor_provider( test_pyreader.set_batch_generator(
reader.data_generator( reader.data_generator(
input_file=args.test_set, input_file=args.test_set,
batch_size=args.batch_size, phase='test', epoch=1, batch_size=args.batch_size, phase='test', epoch=1,
...@@ -386,7 +386,7 @@ def main(args): ...@@ -386,7 +386,7 @@ def main(args):
# infer on infer set # infer on infer set
if args.do_infer: if args.do_infer:
infer_pyreader.decorate_tensor_provider( infer_pyreader.set_batch_generator(
reader.data_generator( reader.data_generator(
input_file=args.infer_set, input_file=args.infer_set,
batch_size=args.batch_size, batch_size=args.batch_size,
......
...@@ -17,7 +17,7 @@ def bow_net(data, ...@@ -17,7 +17,7 @@ def bow_net(data,
Bow net Bow net
""" """
# embedding layer # embedding layer
emb = fluid.layers.embedding(input=data, size=[dict_dim, emb_dim]) emb = fluid.embedding(input=data, size=[dict_dim, emb_dim])
emb = fluid.layers.sequence_unpad(emb, length=seq_len) emb = fluid.layers.sequence_unpad(emb, length=seq_len)
# bow layer # bow layer
bow = fluid.layers.sequence_pool(input=emb, pool_type='sum') bow = fluid.layers.sequence_pool(input=emb, pool_type='sum')
...@@ -50,7 +50,7 @@ def cnn_net(data, ...@@ -50,7 +50,7 @@ def cnn_net(data,
Conv net Conv net
""" """
# embedding layer # embedding layer
emb = fluid.layers.embedding(input=data, size=[dict_dim, emb_dim]) emb = fluid.embedding(input=data, size=[dict_dim, emb_dim])
emb = fluid.layers.sequence_unpad(emb, length=seq_len) emb = fluid.layers.sequence_unpad(emb, length=seq_len)
# convolution layer # convolution layer
conv_3 = fluid.nets.sequence_conv_pool( conv_3 = fluid.nets.sequence_conv_pool(
...@@ -87,7 +87,7 @@ def lstm_net(data, ...@@ -87,7 +87,7 @@ def lstm_net(data,
Lstm net Lstm net
""" """
# embedding layer # embedding layer
emb = fluid.layers.embedding( emb = fluid.embedding(
input=data, input=data,
size=[dict_dim, emb_dim], size=[dict_dim, emb_dim],
param_attr=fluid.ParamAttr(learning_rate=emb_lr)) param_attr=fluid.ParamAttr(learning_rate=emb_lr))
...@@ -129,7 +129,7 @@ def bilstm_net(data, ...@@ -129,7 +129,7 @@ def bilstm_net(data,
Bi-Lstm net Bi-Lstm net
""" """
# embedding layer # embedding layer
emb = fluid.layers.embedding( emb = fluid.embedding(
input=data, input=data,
size=[dict_dim, emb_dim], size=[dict_dim, emb_dim],
param_attr=fluid.ParamAttr(learning_rate=emb_lr)) param_attr=fluid.ParamAttr(learning_rate=emb_lr))
...@@ -175,7 +175,7 @@ def gru_net(data, ...@@ -175,7 +175,7 @@ def gru_net(data,
""" """
gru net gru net
""" """
emb = fluid.layers.embedding( emb = fluid.embedding(
input=data, input=data,
size=[dict_dim, emb_dim], size=[dict_dim, emb_dim],
param_attr=fluid.ParamAttr(learning_rate=emb_lr)) param_attr=fluid.ParamAttr(learning_rate=emb_lr))
...@@ -216,7 +216,7 @@ def textcnn_net(data, ...@@ -216,7 +216,7 @@ def textcnn_net(data,
win_sizes = [1, 2, 3] win_sizes = [1, 2, 3]
# embedding layer # embedding layer
emb = fluid.layers.embedding(input=data, size=[dict_dim, emb_dim]) emb = fluid.embedding(input=data, size=[dict_dim, emb_dim])
emb = fluid.layers.sequence_unpad(emb, length=seq_len) emb = fluid.layers.sequence_unpad(emb, length=seq_len)
# convolution layer # convolution layer
convs = [] convs = []
......
...@@ -29,6 +29,21 @@ def check_cuda(use_cuda, err = \ ...@@ -29,6 +29,21 @@ def check_cuda(use_cuda, err = \
except Exception as e: except Exception as e:
pass pass
def check_version():
"""
Log error and exit when the installed version of paddlepaddle is
not satisfied.
"""
err = "PaddlePaddle version 1.6 or higher is required, " \
"or a suitable develop version is satisfied as well. \n" \
"Please make sure the version is good with your code." \
try:
fluid.require_version('1.6.0')
except Exception as e:
print(err)
sys.exit(1)
if __name__ == "__main__": if __name__ == "__main__":
check_cuda(True) check_cuda(True)
......
...@@ -30,19 +30,19 @@ from models.transformer_encoder import encoder, pre_process_layer ...@@ -30,19 +30,19 @@ from models.transformer_encoder import encoder, pre_process_layer
def ernie_pyreader(args, pyreader_name): def ernie_pyreader(args, pyreader_name):
"""define standard ernie pyreader""" """define standard ernie pyreader"""
pyreader = fluid.layers.py_reader( src_ids = fluid.data(name='1', shape=[-1, args.max_seq_len, 1], dtype='int64')
sent_ids = fluid.data(name='2', shape=[-1, args.max_seq_len, 1], dtype='int64')
pos_ids = fluid.data(name='3', shape=[-1, args.max_seq_len, 1], dtype='int64')
input_mask = fluid.data(name='4', shape=[-1, args.max_seq_len, 1], dtype='float32')
labels = fluid.data(name='5', shape=[-1, 1], dtype='int64')
seq_lens = fluid.data(name='6', shape=[-1], dtype='int64')
pyreader = fluid.io.DataLoader.from_generator(
feed_list=[src_ids, sent_ids, pos_ids, input_mask, labels, seq_lens],
capacity=50, capacity=50,
shapes=[[-1, args.max_seq_len, 1], [-1, args.max_seq_len, 1], iterable=False,
[-1, args.max_seq_len, 1], [-1, args.max_seq_len, 1], [-1, 1],
[-1, 1]],
dtypes=['int64', 'int64', 'int64', 'float32', 'int64', 'int64'],
lod_levels=[0, 0, 0, 0, 0, 0],
name=pyreader_name,
use_double_buffer=True) use_double_buffer=True)
(src_ids, sent_ids, pos_ids, input_mask, labels,
seq_lens) = fluid.layers.read_file(pyreader)
ernie_inputs = { ernie_inputs = {
"src_ids": src_ids, "src_ids": src_ids,
"sent_ids": sent_ids, "sent_ids": sent_ids,
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册