From 588d8dc4fe51824c13c86ff4ba0308b455a93548 Mon Sep 17 00:00:00 2001 From: bbking Date: Wed, 16 Oct 2019 15:37:58 +0800 Subject: [PATCH] update PaddleNLP emotion_detection and ernie for Release/1.6 (#3608) * emotion-detection => 1.6 * ERNIE => 1.6 * [PaddleNLP] update emotion_detection readme --- PaddleNLP/emotion_detection/README.md | 4 +- PaddleNLP/emotion_detection/download_model.sh | 4 +- .../emotion_detection/inference_model.py | 21 +------ PaddleNLP/emotion_detection/reader.py | 8 +-- PaddleNLP/emotion_detection/run_classifier.py | 58 +++++++++---------- .../emotion_detection/run_ernie_classifier.py | 8 +-- PaddleNLP/models/classification/nets.py | 12 ++-- PaddleNLP/models/model_check.py | 15 +++++ PaddleNLP/models/representation/ernie.py | 20 +++---- 9 files changed, 74 insertions(+), 76 deletions(-) diff --git a/PaddleNLP/emotion_detection/README.md b/PaddleNLP/emotion_detection/README.md index 4bafa5ee..bc0ee7c4 100644 --- a/PaddleNLP/emotion_detection/README.md +++ b/PaddleNLP/emotion_detection/README.md @@ -33,7 +33,7 @@ 1. PaddlePaddle 安装 - 本项目依赖于 PaddlePaddle Fluid 1.3.2 及以上版本,请参考 [安装指南](http://www.paddlepaddle.org/#quick-start) 进行安装 + 本项目依赖于 PaddlePaddle Fluid 1.6 及以上版本,请参考 [安装指南](http://www.paddlepaddle.org/#quick-start) 进行安装 2. 代码安装 @@ -46,7 +46,7 @@ 3. 环境依赖 - 请参考 PaddlePaddle [安装说明](https://www.paddlepaddle.org.cn/documentation/docs/zh/1.5/beginners_guide/install/index_cn.html) 部分的内容 + Python 2 的版本要求 2.7.15+,Python 3 的版本要求 3.5.1+/3.6/3.7,其它环境请参考 PaddlePaddle [安装说明](https://www.paddlepaddle.org.cn/documentation/docs/zh/1.5/beginners_guide/install/index_cn.html) 部分的内容 ### 代码结构说明 diff --git a/PaddleNLP/emotion_detection/download_model.sh b/PaddleNLP/emotion_detection/download_model.sh index 2451aff6..230b5c4f 100644 --- a/PaddleNLP/emotion_detection/download_model.sh +++ b/PaddleNLP/emotion_detection/download_model.sh @@ -1,7 +1,7 @@ #!/bin/bash -mkdir -p models -cd models +mkdir -p pretrain_models +cd pretrain_models # download pretrain model file to ./models/ MODEL_CNN=https://baidu-nlp.bj.bcebos.com/emotion_detection_textcnn-1.0.0.tar.gz diff --git a/PaddleNLP/emotion_detection/inference_model.py b/PaddleNLP/emotion_detection/inference_model.py index cf572e64..542aa9bd 100644 --- a/PaddleNLP/emotion_detection/inference_model.py +++ b/PaddleNLP/emotion_detection/inference_model.py @@ -44,9 +44,8 @@ def do_save_inference_model(args): with fluid.program_guard(test_prog, startup_prog): with fluid.unique_name.guard(): - infer_pyreader, probs, feed_target_names = create_model( + infer_loader, probs, feed_target_names = create_model( args, - pyreader_name='infer_reader', num_labels=args.num_labels, is_prediction=True) @@ -79,20 +78,7 @@ def test_inference_model(args, texts): dev_count = int(os.environ.get('CPU_NUM', 1)) place = fluid.CPUPlace() - test_prog = fluid.default_main_program() - startup_prog = fluid.default_startup_program() - - with fluid.program_guard(test_prog, startup_prog): - with fluid.unique_name.guard(): - infer_pyreader, probs, feed_target_names = create_model( - args, - pyreader_name='infer_reader', - num_labels=args.num_labels, - is_prediction=True) - - test_prog = test_prog.clone(for_test=True) exe = fluid.Executor(place) - exe.run(startup_prog) assert (args.inference_model_dir) infer_program, feed_names, fetch_targets = fluid.io.load_inference_model( @@ -107,9 +93,8 @@ def test_inference_model(args, texts): wids, seq_len = utils.pad_wid(wids) data.append(wids) seq_lens.append(seq_len) - batch_size = len(data) - data = np.array(data).reshape((batch_size, 128, 1)) - seq_lens = np.array(seq_lens).reshape((batch_size, 1)) + data = np.array(data) + seq_lens = np.array(seq_lens) pred = exe.run(infer_program, feed={ diff --git a/PaddleNLP/emotion_detection/reader.py b/PaddleNLP/emotion_detection/reader.py index f0e7efd6..3adb8d01 100644 --- a/PaddleNLP/emotion_detection/reader.py +++ b/PaddleNLP/emotion_detection/reader.py @@ -96,16 +96,16 @@ class EmoTectProcessor(object): Generate data for train, dev or test """ if phase == "train": - return paddle.batch( + return fluid.io.batch( self.get_train_examples(self.data_dir, epoch, self.max_seq_len), batch_size) elif phase == "dev": - return paddle.batch( + return fluid.io.batch( self.get_dev_examples(self.data_dir, epoch, self.max_seq_len), batch_size) elif phase == "test": - return paddle.batch( + return fluid.io.batch( self.get_test_examples(self.data_dir, epoch, self.max_seq_len), batch_size) elif phase == "infer": - return paddle.batch( + return fluid.io.batch( self.get_infer_examples(self.data_dir, epoch, self.max_seq_len), batch_size) else: raise ValueError( diff --git a/PaddleNLP/emotion_detection/run_classifier.py b/PaddleNLP/emotion_detection/run_classifier.py index 99bf0471..9e3e9a82 100644 --- a/PaddleNLP/emotion_detection/run_classifier.py +++ b/PaddleNLP/emotion_detection/run_classifier.py @@ -32,30 +32,30 @@ import numpy as np from models.classification import nets from models.model_check import check_cuda +from models.model_check import check_version from config import PDConfig import reader import utils def create_model(args, - pyreader_name, num_labels, is_prediction=False): """ Create Model for Emotion Detection """ - data = fluid.layers.data(name="words", shape=[-1, args.max_seq_len, 1], dtype="int64") - label = fluid.layers.data(name="label", shape=[-1, 1], dtype="int64") - seq_len = fluid.layers.data(name="seq_len", shape=[-1, 1], dtype="int64") + data = fluid.data(name="words", shape=[-1, args.max_seq_len], dtype="int64") + label = fluid.data(name="label", shape=[-1, 1], dtype="int64") + seq_len = fluid.data(name="seq_len", shape=[-1], dtype="int64") if is_prediction: - pyreader = fluid.io.PyReader( + loader = fluid.io.DataLoader.from_generator( feed_list=[data, seq_len], capacity=16, iterable=False, return_list=False) else: - pyreader = fluid.io.PyReader( + loader = fluid.io.DataLoader.from_generator( feed_list=[data, label, seq_len], capacity=16, iterable=False, @@ -78,19 +78,19 @@ def create_model(args, if is_prediction: probs = network(data, seq_len, None, args.vocab_size, class_dim=num_labels, is_prediction=True) - return pyreader, probs, [data.name, seq_len.name] + return loader, probs, [data.name, seq_len.name] avg_loss, probs = network(data, seq_len, label, args.vocab_size, class_dim=num_labels) num_seqs = fluid.layers.create_tensor(dtype='int64') accuracy = fluid.layers.accuracy(input=probs, label=label, total=num_seqs) - return pyreader, avg_loss, accuracy, num_seqs + return loader, avg_loss, accuracy, num_seqs -def evaluate(exe, test_program, test_pyreader, fetch_list, eval_phase): +def evaluate(exe, test_program, test_loader, fetch_list, eval_phase): """ Evaluation Function """ - test_pyreader.start() + test_loader.start() total_cost, total_acc, total_num_seqs = [], [], [] time_begin = time.time() while True: @@ -105,7 +105,7 @@ def evaluate(exe, test_program, test_pyreader, fetch_list, eval_phase): total_acc.extend(np_acc * np_num_seqs) total_num_seqs.extend(np_num_seqs) except fluid.core.EOFException: - test_pyreader.reset() + test_loader.reset() break time_end = time.time() print("[%s evaluation] avg loss: %f, avg acc: %f, elapsed time: %f s" % @@ -113,8 +113,8 @@ def evaluate(exe, test_program, test_pyreader, fetch_list, eval_phase): np.sum(total_acc) / np.sum(total_num_seqs), time_end - time_begin)) -def infer(exe, infer_program, infer_pyreader, fetch_list, infer_phase): - infer_pyreader.start() +def infer(exe, infer_program, infer_loader, fetch_list, infer_phase): + infer_loader.start() time_begin = time.time() while True: try: @@ -125,7 +125,7 @@ def infer(exe, infer_program, infer_pyreader, fetch_list, infer_phase): print("%d\t%f\t%f\t%f" % (np.argmax(probs), probs[0], probs[1], probs[2])) except fluid.core.EOFException as e: - infer_pyreader.reset() + infer_loader.reset() break time_end = time.time() print("[%s] elapsed time: %f s" % (infer_phase, time_end - time_begin)) @@ -172,9 +172,8 @@ def main(args): with fluid.program_guard(train_program, startup_prog): with fluid.unique_name.guard(): - train_pyreader, loss, accuracy, num_seqs = create_model( + train_loader, loss, accuracy, num_seqs = create_model( args, - pyreader_name='train_reader', num_labels=num_labels, is_prediction=False) @@ -202,9 +201,8 @@ def main(args): test_prog = fluid.Program() with fluid.program_guard(test_prog, startup_prog): with fluid.unique_name.guard(): - test_pyreader, loss, accuracy, num_seqs = create_model( + test_loader, loss, accuracy, num_seqs = create_model( args, - pyreader_name='test_reader', num_labels=num_labels, is_prediction=False) test_prog = test_prog.clone(for_test=True) @@ -218,9 +216,8 @@ def main(args): test_prog = fluid.Program() with fluid.program_guard(test_prog, startup_prog): with fluid.unique_name.guard(): - infer_pyreader, probs, _ = create_model( + infer_loader, probs, _ = create_model( args, - pyreader_name='infer_reader', num_labels=num_labels, is_prediction=True) test_prog = test_prog.clone(for_test=True) @@ -239,18 +236,18 @@ def main(args): if args.do_train: train_exe = exe - train_pyreader.decorate_sample_list_generator(train_data_generator) + train_loader.set_sample_list_generator(train_data_generator) else: train_exe = None if args.do_val: test_exe = exe - test_pyreader.decorate_sample_list_generator(test_data_generator) + test_loader.set_sample_list_generator(test_data_generator) if args.do_infer: test_exe = exe - infer_pyreader.decorate_sample_list_generator(infer_data_generator) + infer_loader.set_sample_list_generator(infer_data_generator) if args.do_train: - train_pyreader.start() + train_loader.start() steps = 0 total_cost, total_acc, total_num_seqs = [], [], [] time_begin = time.time() @@ -276,7 +273,7 @@ def main(args): total_num_seqs.extend(np_num_seqs) if args.verbose: - verbose = "train pyreader queue size: %d, " % train_pyreader.queue.size( + verbose = "train loader queue size: %d, " % train_loader.queue.size( ) print(verbose) @@ -301,20 +298,20 @@ def main(args): if steps % args.validation_steps == 0: # evaluate on dev set if args.do_val: - evaluate(test_exe, test_prog, test_pyreader, + evaluate(test_exe, test_prog, test_loader, [loss.name, accuracy.name, num_seqs.name], "dev") except fluid.core.EOFException: print("final step: %d " % steps) if args.do_val: - evaluate(test_exe, test_prog, test_pyreader, + evaluate(test_exe, test_prog, test_loader, [loss.name, accuracy.name, num_seqs.name], "dev") save_path = os.path.join(args.save_checkpoint_dir, "step_" + str(steps)) fluid.io.save_persistables(exe, save_path, train_program) - train_pyreader.reset() + train_loader.reset() break if args.do_train and args.enable_ce: @@ -336,14 +333,14 @@ def main(args): # evaluate on test set if not args.do_train and args.do_val: print("Final test result:") - evaluate(test_exe, test_prog, test_pyreader, + evaluate(test_exe, test_prog, test_loader, [loss.name, accuracy.name, num_seqs.name], "test") # infer if args.do_infer: print("Final infer result:") - infer(test_exe, test_prog, infer_pyreader, + infer(test_exe, test_prog, infer_loader, [probs.name], "infer") @@ -361,4 +358,5 @@ if __name__ == "__main__": args.build() args.print_arguments() check_cuda(args.use_cuda) + check_version() main(args) diff --git a/PaddleNLP/emotion_detection/run_ernie_classifier.py b/PaddleNLP/emotion_detection/run_ernie_classifier.py index 110e21ee..2acdaa53 100644 --- a/PaddleNLP/emotion_detection/run_ernie_classifier.py +++ b/PaddleNLP/emotion_detection/run_ernie_classifier.py @@ -305,7 +305,7 @@ def main(args): if args.do_train: train_exe = exe - train_pyreader.decorate_tensor_provider(train_data_generator) + train_pyreader.set_batch_generator(train_data_generator) else: train_exe = None if args.do_val or args.do_infer: @@ -355,7 +355,7 @@ def main(args): if steps % args.validation_steps == 0: # evaluate dev set if args.do_val: - test_pyreader.decorate_tensor_provider( + test_pyreader.set_batch_generator( reader.data_generator( input_file=args.dev_set, batch_size=args.batch_size, @@ -375,7 +375,7 @@ def main(args): # eval on test set if not args.do_train and args.do_val: - test_pyreader.decorate_tensor_provider( + test_pyreader.set_batch_generator( reader.data_generator( input_file=args.test_set, batch_size=args.batch_size, phase='test', epoch=1, @@ -386,7 +386,7 @@ def main(args): # infer on infer set if args.do_infer: - infer_pyreader.decorate_tensor_provider( + infer_pyreader.set_batch_generator( reader.data_generator( input_file=args.infer_set, batch_size=args.batch_size, diff --git a/PaddleNLP/models/classification/nets.py b/PaddleNLP/models/classification/nets.py index fbc5e275..c66b3927 100644 --- a/PaddleNLP/models/classification/nets.py +++ b/PaddleNLP/models/classification/nets.py @@ -17,7 +17,7 @@ def bow_net(data, Bow net """ # embedding layer - emb = fluid.layers.embedding(input=data, size=[dict_dim, emb_dim]) + emb = fluid.embedding(input=data, size=[dict_dim, emb_dim]) emb = fluid.layers.sequence_unpad(emb, length=seq_len) # bow layer bow = fluid.layers.sequence_pool(input=emb, pool_type='sum') @@ -50,7 +50,7 @@ def cnn_net(data, Conv net """ # embedding layer - emb = fluid.layers.embedding(input=data, size=[dict_dim, emb_dim]) + emb = fluid.embedding(input=data, size=[dict_dim, emb_dim]) emb = fluid.layers.sequence_unpad(emb, length=seq_len) # convolution layer conv_3 = fluid.nets.sequence_conv_pool( @@ -87,7 +87,7 @@ def lstm_net(data, Lstm net """ # embedding layer - emb = fluid.layers.embedding( + emb = fluid.embedding( input=data, size=[dict_dim, emb_dim], param_attr=fluid.ParamAttr(learning_rate=emb_lr)) @@ -129,7 +129,7 @@ def bilstm_net(data, Bi-Lstm net """ # embedding layer - emb = fluid.layers.embedding( + emb = fluid.embedding( input=data, size=[dict_dim, emb_dim], param_attr=fluid.ParamAttr(learning_rate=emb_lr)) @@ -175,7 +175,7 @@ def gru_net(data, """ gru net """ - emb = fluid.layers.embedding( + emb = fluid.embedding( input=data, size=[dict_dim, emb_dim], param_attr=fluid.ParamAttr(learning_rate=emb_lr)) @@ -216,7 +216,7 @@ def textcnn_net(data, win_sizes = [1, 2, 3] # embedding layer - emb = fluid.layers.embedding(input=data, size=[dict_dim, emb_dim]) + emb = fluid.embedding(input=data, size=[dict_dim, emb_dim]) emb = fluid.layers.sequence_unpad(emb, length=seq_len) # convolution layer convs = [] diff --git a/PaddleNLP/models/model_check.py b/PaddleNLP/models/model_check.py index af0612fd..135ababe 100644 --- a/PaddleNLP/models/model_check.py +++ b/PaddleNLP/models/model_check.py @@ -29,6 +29,21 @@ def check_cuda(use_cuda, err = \ except Exception as e: pass +def check_version(): + """ + Log error and exit when the installed version of paddlepaddle is + not satisfied. + """ + err = "PaddlePaddle version 1.6 or higher is required, " \ + "or a suitable develop version is satisfied as well. \n" \ + "Please make sure the version is good with your code." \ + + try: + fluid.require_version('1.6.0') + except Exception as e: + print(err) + sys.exit(1) + if __name__ == "__main__": check_cuda(True) diff --git a/PaddleNLP/models/representation/ernie.py b/PaddleNLP/models/representation/ernie.py index 69831ef2..a12c483f 100644 --- a/PaddleNLP/models/representation/ernie.py +++ b/PaddleNLP/models/representation/ernie.py @@ -30,19 +30,19 @@ from models.transformer_encoder import encoder, pre_process_layer def ernie_pyreader(args, pyreader_name): """define standard ernie pyreader""" - pyreader = fluid.layers.py_reader( + src_ids = fluid.data(name='1', shape=[-1, args.max_seq_len, 1], dtype='int64') + sent_ids = fluid.data(name='2', shape=[-1, args.max_seq_len, 1], dtype='int64') + pos_ids = fluid.data(name='3', shape=[-1, args.max_seq_len, 1], dtype='int64') + input_mask = fluid.data(name='4', shape=[-1, args.max_seq_len, 1], dtype='float32') + labels = fluid.data(name='5', shape=[-1, 1], dtype='int64') + seq_lens = fluid.data(name='6', shape=[-1], dtype='int64') + + pyreader = fluid.io.DataLoader.from_generator( + feed_list=[src_ids, sent_ids, pos_ids, input_mask, labels, seq_lens], capacity=50, - shapes=[[-1, args.max_seq_len, 1], [-1, args.max_seq_len, 1], - [-1, args.max_seq_len, 1], [-1, args.max_seq_len, 1], [-1, 1], - [-1, 1]], - dtypes=['int64', 'int64', 'int64', 'float32', 'int64', 'int64'], - lod_levels=[0, 0, 0, 0, 0, 0], - name=pyreader_name, + iterable=False, use_double_buffer=True) - (src_ids, sent_ids, pos_ids, input_mask, labels, - seq_lens) = fluid.layers.read_file(pyreader) - ernie_inputs = { "src_ids": src_ids, "sent_ids": sent_ids, -- GitLab