diff --git a/PaddleNLP/language_representations_kit/BERT/model/classifier.py b/PaddleNLP/language_representations_kit/BERT/model/classifier.py index 186d543184d1bc35722c58484cfee2a5ae983a70..8b9fea2b7171b8e6dd123a4eb27fbf75b563328c 100644 --- a/PaddleNLP/language_representations_kit/BERT/model/classifier.py +++ b/PaddleNLP/language_representations_kit/BERT/model/classifier.py @@ -41,7 +41,8 @@ def create_model(args, bert_config, num_labels, is_prediction=False): ] (src_ids, pos_ids, sent_ids, input_mask, labels) = inputs - pyreader = fluid.io.PyReader(feed_list=inputs, capacity=50, iterable=False) + data_loader = fluid.io.DataLoader.from_generator( + feed_list=inputs, capacity=50, iterable=False) bert = BertModel( src_ids=src_ids, @@ -71,7 +72,7 @@ def create_model(args, bert_config, num_labels, is_prediction=False): feed_targets_name = [ src_ids.name, pos_ids.name, sent_ids.name, input_mask.name ] - return pyreader, probs, feed_targets_name + return data_loader, probs, feed_targets_name logits = fluid.layers.reshape(logits, [-1, num_labels], inplace=True) ce_loss, probs = fluid.layers.softmax_with_cross_entropy( @@ -81,4 +82,4 @@ def create_model(args, bert_config, num_labels, is_prediction=False): num_seqs = fluid.layers.create_tensor(dtype='int64') accuracy = fluid.layers.accuracy(input=probs, label=labels, total=num_seqs) - return pyreader, loss, probs, accuracy, num_seqs + return data_loader, loss, probs, accuracy, num_seqs diff --git a/PaddleNLP/language_representations_kit/BERT/predict_classifier.py b/PaddleNLP/language_representations_kit/BERT/predict_classifier.py index 0f0f842865549b6c6c2ba97dce37f9007e39c697..2e54bc8fb8271c2c1ccfd46180529ab1c7cf9a9c 100644 --- a/PaddleNLP/language_representations_kit/BERT/predict_classifier.py +++ b/PaddleNLP/language_representations_kit/BERT/predict_classifier.py @@ -17,6 +17,12 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function +import six +import sys +if six.PY2: + reload(sys) + sys.setdefaultencoding('utf8') + import os import time import argparse @@ -82,7 +88,7 @@ def main(args): predict_startup = fluid.Program() with fluid.program_guard(predict_prog, predict_startup): with fluid.unique_name.guard(): - predict_pyreader, probs, feed_target_names = create_model( + predict_data_loader, probs, feed_target_names = create_model( args, bert_config=bert_config, num_labels=num_labels, @@ -112,11 +118,11 @@ def main(args): predict_exe = fluid.ParallelExecutor( use_cuda=args.use_cuda, main_program=predict_prog) - predict_pyreader.decorate_batch_generator( + predict_data_loader.set_batch_generator( processor.data_generator( batch_size=args.batch_size, phase='test', epoch=1, shuffle=False)) - predict_pyreader.start() + predict_data_loader.start() all_results = [] time_begin = time.time() while True: @@ -124,7 +130,7 @@ def main(args): results = predict_exe.run(fetch_list=[probs.name]) all_results.extend(results[0]) except fluid.core.EOFException: - predict_pyreader.reset() + predict_data_loader.reset() break time_end = time.time() diff --git a/PaddleNLP/language_representations_kit/BERT/run_classifier.py b/PaddleNLP/language_representations_kit/BERT/run_classifier.py index 3daa819ba5252b6b37669afea9d8de837ba8d400..1e769d3446da2c92f27ae0f13f31db3b2f30735a 100644 --- a/PaddleNLP/language_representations_kit/BERT/run_classifier.py +++ b/PaddleNLP/language_representations_kit/BERT/run_classifier.py @@ -17,9 +17,11 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function +import six import sys -reload(sys) -sys.setdefaultencoding('utf8') +if six.PY2: + reload(sys) + sys.setdefaultencoding('utf8') import os import time @@ -107,8 +109,8 @@ args = parser.parse_args() # yapf: enable. -def evaluate(exe, test_program, test_pyreader, fetch_list, eval_phase): - test_pyreader.start() +def evaluate(exe, test_program, test_data_loader, fetch_list, eval_phase): + test_data_loader.start() total_cost, total_acc, total_num_seqs = [], [], [] time_begin = time.time() while True: @@ -119,7 +121,7 @@ def evaluate(exe, test_program, test_pyreader, fetch_list, eval_phase): total_acc.extend(np_acc * np_num_seqs) total_num_seqs.extend(np_num_seqs) except fluid.core.EOFException: - test_pyreader.reset() + test_data_loader.reset() break time_end = time.time() print("[%s evaluation] ave loss: %f, ave acc: %f, elapsed time: %f s" % @@ -203,7 +205,7 @@ def main(args): with fluid.program_guard(train_program, startup_prog): with fluid.unique_name.guard(): - train_pyreader, loss, probs, accuracy, num_seqs = create_model( + train_data_loader, loss, probs, accuracy, num_seqs = create_model( args, bert_config=bert_config, num_labels=num_labels) @@ -228,13 +230,13 @@ def main(args): dev_prog = fluid.Program() with fluid.program_guard(dev_prog, startup_prog): with fluid.unique_name.guard(): - dev_pyreader, loss, probs, accuracy, num_seqs = create_model( + dev_data_loader, loss, probs, accuracy, num_seqs = create_model( args, bert_config=bert_config, num_labels=num_labels) dev_prog = dev_prog.clone(for_test=True) - dev_pyreader.decorate_batch_generator( + dev_data_loader.set_batch_generator( processor.data_generator( batch_size=args.batch_size, phase='dev', @@ -246,13 +248,13 @@ def main(args): test_prog = fluid.Program() with fluid.program_guard(test_prog, startup_prog): with fluid.unique_name.guard(): - test_pyreader, loss, probs, accuracy, num_seqs = create_model( + test_data_loader, loss, probs, accuracy, num_seqs = create_model( args, bert_config=bert_config, num_labels=num_labels) test_prog = test_prog.clone(for_test=True) - test_pyreader.decorate_batch_generator( + test_data_loader.set_batch_generator( processor.data_generator( batch_size=args.batch_size, phase='test', @@ -305,11 +307,11 @@ def main(args): train_compiled_program = fluid.CompiledProgram(train_program).with_data_parallel( loss_name=loss.name, build_strategy=build_strategy) - train_pyreader.decorate_batch_generator(train_data_generator, place) + train_data_loader.set_batch_generator(train_data_generator, place) if args.do_train: - train_pyreader.start() + train_data_loader.start() steps = 0 total_cost, total_acc, total_num_seqs = [], [], [] time_begin = time.time() @@ -339,7 +341,7 @@ def main(args): total_num_seqs.extend(np_num_seqs) if args.verbose: - verbose = "train pyreader queue size: %d, " % train_pyreader.queue.size( + verbose = "train data_loader queue size: %d, " % train_data_loader.queue.size( ) verbose += "learning rate: %f" % np_lr[0] if args.use_fp16: @@ -375,18 +377,18 @@ def main(args): throughput = [] # evaluate dev set if args.do_val: - evaluate(exe, dev_prog, dev_pyreader, + evaluate(exe, dev_prog, dev_data_loader, [loss.name, accuracy.name, num_seqs.name], "dev") # evaluate test set if args.do_test: - evaluate(exe, test_prog, test_pyreader, + evaluate(exe, test_prog, test_data_loader, [loss.name, accuracy.name, num_seqs.name], "test") except fluid.core.EOFException: save_path = os.path.join(args.checkpoints, "step_" + str(steps)) fluid.io.save_persistables(exe, save_path, train_program) - train_pyreader.reset() + train_data_loader.reset() break if args.enable_ce: card_num = get_cards() @@ -410,13 +412,13 @@ def main(args): # final eval on dev set if args.do_val: print("Final validation result:") - evaluate(exe, dev_prog, dev_pyreader, + evaluate(exe, dev_prog, dev_data_loader, [loss.name, accuracy.name, num_seqs.name], "dev") # final eval on test set if args.do_test: print("Final test result:") - evaluate(exe, test_prog, test_pyreader, + evaluate(exe, test_prog, test_data_loader, [loss.name, accuracy.name, num_seqs.name], "test") diff --git a/PaddleNLP/language_representations_kit/BERT/run_squad.py b/PaddleNLP/language_representations_kit/BERT/run_squad.py index 7ae937a3e9a298d7003f6aa5fecbf9afefbcc8f5..af0a706ca4296cb4f5899aee205288b01825bb00 100644 --- a/PaddleNLP/language_representations_kit/BERT/run_squad.py +++ b/PaddleNLP/language_representations_kit/BERT/run_squad.py @@ -17,9 +17,11 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function +import six import sys -reload(sys) -sys.setdefaultencoding('utf8') +if six.PY2: + reload(sys) + sys.setdefaultencoding('utf8') import argparse import collections @@ -129,7 +131,7 @@ def create_model(bert_config, is_training=False): dtype=input_fields['dtypes'][i], lod_level=input_fields['lod_levels'][i]) for i in range(len(input_fields['names']))] - pyreader = fluid.io.PyReader(feed_list=inputs, capacity=50, iterable=False) + data_loader = fluid.io.DataLoader.from_generator(feed_list=inputs, capacity=50, iterable=False) if is_training: (src_ids, pos_ids, sent_ids, input_mask, start_positions, end_positions) = inputs @@ -174,23 +176,23 @@ def create_model(bert_config, is_training=False): start_loss = compute_loss(start_logits, start_positions) end_loss = compute_loss(end_logits, end_positions) total_loss = (start_loss + end_loss) / 2.0 - return pyreader, total_loss, num_seqs + return data_loader, total_loss, num_seqs else: - return pyreader, unique_id, start_logits, end_logits, num_seqs + return data_loader, unique_id, start_logits, end_logits, num_seqs RawResult = collections.namedtuple("RawResult", ["unique_id", "start_logits", "end_logits"]) -def predict(test_exe, test_program, test_pyreader, fetch_list, processor): +def predict(test_exe, test_program, test_data_loader, fetch_list, processor): if not os.path.exists(args.checkpoints): os.makedirs(args.checkpoints) output_prediction_file = os.path.join(args.checkpoints, "predictions.json") output_nbest_file = os.path.join(args.checkpoints, "nbest_predictions.json") output_null_log_odds_file = os.path.join(args.checkpoints, "null_odds.json") - test_pyreader.start() + test_data_loader.start() all_results = [] time_begin = time.time() while True: @@ -209,7 +211,7 @@ def predict(test_exe, test_program, test_pyreader, fetch_list, processor): start_logits=start_logits, end_logits=end_logits)) except fluid.core.EOFException: - test_pyreader.reset() + test_data_loader.reset() break time_end = time.time() @@ -277,7 +279,7 @@ def train(args): train_program = fluid.Program() with fluid.program_guard(train_program, startup_prog): with fluid.unique_name.guard(): - train_pyreader, loss, num_seqs = create_model( + train_data_loader, loss, num_seqs = create_model( bert_config=bert_config, is_training=True) @@ -302,7 +304,7 @@ def train(args): test_prog = fluid.Program() with fluid.program_guard(test_prog, startup_prog): with fluid.unique_name.guard(): - test_pyreader, unique_ids, start_logits, end_logits, num_seqs = create_model( + test_data_loader, unique_ids, start_logits, end_logits, num_seqs = create_model( bert_config=bert_config, is_training=False) @@ -346,9 +348,9 @@ def train(args): train_compiled_program = fluid.CompiledProgram(train_program).with_data_parallel( loss_name=loss.name, exec_strategy=exec_strategy) - train_pyreader.decorate_batch_generator(train_data_generator, place) + train_data_loader.set_batch_generator(train_data_generator, place) - train_pyreader.start() + train_data_loader.start() steps = 0 total_cost, total_num_seqs = [], [] time_begin = time.time() @@ -374,7 +376,7 @@ def train(args): total_num_seqs.extend(np_num_seqs) if args.verbose: - verbose = "train pyreader queue size: %d, " % train_pyreader.queue.size( + verbose = "train data_loader queue size: %d, " % train_data_loader.queue.size( ) verbose += "learning rate: %f " % np_lr[0] if args.use_fp16: @@ -401,11 +403,11 @@ def train(args): save_path = os.path.join(args.checkpoints, "step_" + str(steps) + "_final") fluid.io.save_persistables(exe, save_path, train_program) - train_pyreader.reset() + train_data_loader.reset() break if args.do_predict: - test_pyreader.decorate_batch_generator( + test_data_loader.set_batch_generator( processor.data_generator( data_path=args.predict_file, batch_size=args.batch_size, @@ -414,7 +416,7 @@ def train(args): dev_count=1, epoch=1), place) - predict(exe, test_prog, test_pyreader, [ + predict(exe, test_prog, test_data_loader, [ unique_ids.name, start_logits.name, end_logits.name, num_seqs.name ], processor) diff --git a/PaddleNLP/language_representations_kit/BERT/train.py b/PaddleNLP/language_representations_kit/BERT/train.py index 6bc985c4d4c9b28794f74b17628ecff0dfb3800d..686e5281a11795ed7f08c6ecf9a5662cf809e112 100644 --- a/PaddleNLP/language_representations_kit/BERT/train.py +++ b/PaddleNLP/language_representations_kit/BERT/train.py @@ -17,13 +17,14 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function +import six import sys -reload(sys) -sys.setdefaultencoding('utf8') +if six.PY2: + reload(sys) + sys.setdefaultencoding('utf8') import os import time -import sys import argparse import numpy as np import multiprocessing @@ -111,7 +112,7 @@ def create_model(bert_config): (src_ids, pos_ids, sent_ids, input_mask, mask_label, mask_pos, labels) = inputs - pyreader = fluid.io.PyReader(feed_list=inputs, capacity=50, iterable=False) + data_loader = fluid.io.DataLoader.from_generator(feed_list=inputs, capacity=50, iterable=False) bert = BertModel( src_ids=src_ids, @@ -125,14 +126,14 @@ def create_model(bert_config): next_sent_acc, mask_lm_loss, total_loss = bert.get_pretraining_output( mask_label, mask_pos, labels) - return pyreader, next_sent_acc, mask_lm_loss, total_loss + return data_loader, next_sent_acc, mask_lm_loss, total_loss def predict_wrapper(args, exe, bert_config, test_prog=None, - pyreader=None, + data_loader=None, fetch_list=None): # Context to do validation. data_path = args.test_set_dir if args.do_test else args.validation_set_dir @@ -147,7 +148,7 @@ def predict_wrapper(args, max_seq_len=args.max_seq_len, is_test=True) - pyreader.decorate_batch_generator(data_reader.data_generator()) + data_loader.set_batch_generator(data_reader.data_generator()) if args.do_test: assert args.init_checkpoint is not None, "[FATAL] Please use --init_checkpoint '/path/to/checkpoints' \ @@ -155,8 +156,8 @@ def predict_wrapper(args, init_pretraining_params(exe, args.init_checkpoint, test_prog) - def predict(exe=exe, pyreader=pyreader): - pyreader.start() + def predict(exe=exe, data_loader=data_loader): + data_loader.start() cost = 0 lm_cost = 0 @@ -175,7 +176,7 @@ def predict_wrapper(args, print("[test_set] steps: %d" % steps) except fluid.core.EOFException: - pyreader.reset() + data_loader.reset() break used_time = time.time() - time_begin @@ -192,7 +193,7 @@ def test(args): test_startup = fluid.Program() with fluid.program_guard(test_prog, test_startup): with fluid.unique_name.guard(): - test_pyreader, next_sent_acc, mask_lm_loss, total_loss = create_model( + test_data_loader, next_sent_acc, mask_lm_loss, total_loss = create_model( bert_config=bert_config) test_prog = test_prog.clone(for_test=True) @@ -206,7 +207,7 @@ def test(args): exe, bert_config, test_prog=test_prog, - pyreader=test_pyreader, + data_loader=test_data_loader, fetch_list=[next_sent_acc.name, mask_lm_loss.name, total_loss.name]) print("test begin") @@ -227,7 +228,7 @@ def train(args): startup_prog = fluid.Program() with fluid.program_guard(train_program, startup_prog): with fluid.unique_name.guard(): - train_pyreader, next_sent_acc, mask_lm_loss, total_loss = create_model( + train_data_loader, next_sent_acc, mask_lm_loss, total_loss = create_model( bert_config=bert_config) scheduled_lr, loss_scaling = optimization( loss=total_loss, @@ -249,7 +250,7 @@ def train(args): test_prog = fluid.Program() with fluid.program_guard(test_prog, startup_prog): with fluid.unique_name.guard(): - test_pyreader, next_sent_acc, mask_lm_loss, total_loss = create_model( + test_data_loader, next_sent_acc, mask_lm_loss, total_loss = create_model( bert_config=bert_config) test_prog = test_prog.clone(for_test=True) @@ -334,13 +335,13 @@ def train(args): exe, bert_config, test_prog=test_prog, - pyreader=test_pyreader, + data_loader=test_data_loader, fetch_list=[ next_sent_acc.name, mask_lm_loss.name, total_loss.name ]) - train_pyreader.decorate_batch_generator(data_reader.data_generator()) - train_pyreader.start() + train_data_loader.set_batch_generator(data_reader.data_generator()) + train_data_loader.start() steps = 0 cost = [] lm_cost = [] @@ -391,7 +392,7 @@ def train(args): epoch, current_file_index, total_file, current_file = data_reader.get_progress( ) if args.verbose: - verbose = "feed_queue size: %d, " %train_pyreader.queue.size() + verbose = "feed_queue size: %d, " %train_data_loader.queue.size() verbose += "current learning_rate: %f, " % np_lr[0] if args.use_fp16: verbose += "loss scaling: %f" % np_scaling[0] @@ -426,7 +427,7 @@ def train(args): np.mean(np.array(vali_acc) / vali_steps), vali_speed)) except fluid.core.EOFException: - train_pyreader.reset() + train_data_loader.reset() break if __name__ == '__main__': diff --git a/PaddleNLP/language_representations_kit/BERT/utils/fp16.py b/PaddleNLP/language_representations_kit/BERT/utils/fp16.py index 740add267dff2dbf463032bcc47a6741ca9f7c43..c68e1aceeeafdba34516769e380ca702c752946f 100644 --- a/PaddleNLP/language_representations_kit/BERT/utils/fp16.py +++ b/PaddleNLP/language_representations_kit/BERT/utils/fp16.py @@ -119,10 +119,11 @@ def create_master_params_grads(params_grads, main_prog, startup_prog, def master_param_to_train_param(master_params_grads, params_grads, main_prog): for idx, m_p_g in enumerate(master_params_grads): train_p, _ = params_grads[idx] - if train_p.name.find("layer_norm") > -1: - continue with main_prog._optimized_guard([m_p_g[0], m_p_g[1]]): - append_cast_op(m_p_g[0], train_p, main_prog) + if train_p.name.find("layer_norm") > -1: + fluid.layers.assign(m_p_g[0], train_p) + else: + append_cast_op(m_p_g[0], train_p, main_prog) def update_loss_scaling(is_overall_finite, prev_loss_scaling, num_good_steps,