From 9e5cfbf877c4443f3fe3b18c90d35ba76af1c6f3 Mon Sep 17 00:00:00 2001 From: Li Fuchen Date: Fri, 25 Oct 2019 17:02:53 +0800 Subject: [PATCH] Cherry pick from 1.6 to develop (#3765) * Use new save/load api and change fluid.layers.data to fluid.data (#3547) * Use new save/load api and change fluid.layers.data to fluid.data * add check for paddle version * Use new save/load api in ptb_lm (#3546) * Use new save/load api in ptb_lm * add check for paddle version * unify reader to dataloader (#3488) unify reader to dataloader * Use new save/load api and change fluid.layers.data to fluid.data (#3547) * Use new save/load api and change fluid.layers.data to fluid.data * add check for paddle version --- AutoDL | 1 - PaddleNLP/language_model/README.md | 2 + PaddleNLP/language_model/args.py | 9 +++- PaddleNLP/language_model/run.sh | 1 + PaddleNLP/language_model/train.py | 46 +++++++++------- PaddleNLP/models/language_model/lm_model.py | 46 +++++++--------- PaddleNLP/models/model_check.py | 36 +++++++++++++ dygraph/ptb_lm/args.py | 10 ++++ dygraph/ptb_lm/model_check.py | 58 +++++++++++++++++++++ dygraph/ptb_lm/ptb_dy.py | 32 +++++++++--- 10 files changed, 185 insertions(+), 56 deletions(-) delete mode 160000 AutoDL create mode 100644 dygraph/ptb_lm/model_check.py diff --git a/AutoDL b/AutoDL deleted file mode 160000 index 5447bcf7..00000000 --- a/AutoDL +++ /dev/null @@ -1 +0,0 @@ -Subproject commit 5447bcf72d92b58e9efe38e4aa0d47bab68bec31 diff --git a/PaddleNLP/language_model/README.md b/PaddleNLP/language_model/README.md index 6a989d49..10b882a7 100644 --- a/PaddleNLP/language_model/README.md +++ b/PaddleNLP/language_model/README.md @@ -5,6 +5,8 @@ ## 1. 任务说明 本文主要介绍基于lstm的语言的模型的实现,给定一个输入词序列(中文分词、英文tokenize),计算其ppl(语言模型困惑度,用户表示句子的流利程度),基于循环神经网络语言模型的介绍可以[参阅论文](https://arxiv.org/abs/1409.2329)。相对于传统的方法,基于循环神经网络的方法能够更好的解决稀疏词的问题。 +**目前语言模型要求使用PaddlePaddle 1.6及以上版本或适当的develop版本。** + 同时推荐用户参考[IPython Notebook demo](https://aistudio.baidu.com/aistudio/projectDetail/122290) ## 2. 效果说明 diff --git a/PaddleNLP/language_model/args.py b/PaddleNLP/language_model/args.py index eef0af99..8014bb52 100644 --- a/PaddleNLP/language_model/args.py +++ b/PaddleNLP/language_model/args.py @@ -60,10 +60,10 @@ def parse_args(): default=False, help='Whether profiling the trainning [True|False]') parser.add_argument( - '--use_py_reader', + '--use_dataloader', type=str2bool, default=False, - help='Whether using py_reader to feed data [True|False]') + help='Whether using dataloader to feed data [True|False]') parser.add_argument( '--log_path', help='path of the log file. If not set, logs are printed to console') @@ -72,6 +72,11 @@ def parse_args(): type=str, default="models", help='dir of the saved model.') + parser.add_argument( + '--init_from_pretrain_model', + type=str, + default=None, + help='dir to init model.') parser.add_argument('--enable_ce', action='store_true') parser.add_argument('--batch_size', type=int, default=0, help='batch size') parser.add_argument('--max_epoch', type=int, default=0, help='max epoch') diff --git a/PaddleNLP/language_model/run.sh b/PaddleNLP/language_model/run.sh index d836c4ac..851c8977 100644 --- a/PaddleNLP/language_model/run.sh +++ b/PaddleNLP/language_model/run.sh @@ -7,6 +7,7 @@ function run_train() { --data_path data/simple-examples/data/ \ --model_type small \ --use_gpu True + #--init_from_pretrain_model models/0/params } run_train diff --git a/PaddleNLP/language_model/train.py b/PaddleNLP/language_model/train.py index 169c5c70..0c3c3d8b 100644 --- a/PaddleNLP/language_model/train.py +++ b/PaddleNLP/language_model/train.py @@ -40,7 +40,7 @@ import os os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3" from args import * -from models.model_check import check_cuda +from models.model_check import check_cuda, check_version from models.language_model import lm_model from config import RNNConfig import logging @@ -88,7 +88,10 @@ def save_para_npz(train_prog, train_exe): def main(): args = parse_args() + # check if set use_gpu=True in paddlepaddle cpu version check_cuda(args.use_gpu) + # check if paddlepaddle version is satisfied + check_version() logger = logging.getLogger("lm") logger.setLevel(logging.INFO) @@ -124,10 +127,10 @@ def main(): init_scale=config.init_scale, dropout=config.dropout, rnn_model=config.rnn_model, - use_py_reader=args.use_py_reader) + use_dataloader=args.use_dataloader) - if args.use_py_reader: - py_reader = res_vars[-1] + if args.use_dataloader: + dataloader = res_vars[-1] res_vars = res_vars[:-1] loss, last_hidden, last_cell, feed_order = res_vars @@ -159,7 +162,7 @@ def main(): init_scale=config.init_scale, dropout=config.dropout, rnn_model=config.rnn_model, - use_py_reader=False) + use_dataloader=False) # Some op behaves differently for train and inference, we need to call # this clone function to ensure every op is right for inference. inference_program = inference_program.clone(for_test=True) @@ -168,6 +171,15 @@ def main(): exe = Executor(place) exe.run(startup_program) + if args.init_from_pretrain_model: + if not os.path.exists(args.init_from_pretrain_model + '.pdparams'): + print(args.init_from_pretrain_model) + raise Warning("The pretrained params do not exist.") + return + fluid.load(main_program, args.init_from_pretrain_model) + print("finish initing model from pretrained params from %s" % + (args.init_from_pretrain_model)) + device_count = len(fluid.cuda_places()) if args.use_gpu else len( fluid.cpu_places()) @@ -176,8 +188,6 @@ def main(): exec_strategy.num_iteration_per_drop_scope = 100 build_strategy = fluid.BuildStrategy() - build_strategy.enable_inplace = True - build_strategy.memory_optimize = False build_strategy.fuse_all_optimizer_ops = True if args.parallel: @@ -282,7 +292,6 @@ def main(): epoch_id=epoch_id, with_lr=True, device_count=device_count) - batch_start_time = time.time() fetch_outs = exe.run(train_program, feed=input_data_feed, @@ -306,11 +315,10 @@ def main(): print( "-- Epoch:[%d]; Batch:[%d]; Time: %.5f s; ppl: %.5f, lr: %.5f" % (epoch_id, batch_id, batch_time, ppl[0], lr[0])) - ppl = np.exp(total_loss / iters) return ppl - def train_an_epoch_py_reader(epoch_id, batch_times): + def train_an_epoch_dataloader(epoch_id, batch_times): # get train epoch size log_interval = get_log_interval(len(train_data)) @@ -319,7 +327,7 @@ def main(): total_loss = 0 iters = 0 - py_reader.start() + dataloader.start() batch_id = 0 try: while True: @@ -361,14 +369,14 @@ def main(): batch_id += 1 except fluid.core.EOFException: - py_reader.reset() + dataloader.reset() batch_times.append(time.time() - batch_start_time) ppl = np.exp(total_loss / iters) return ppl def train(): - if args.use_py_reader: + if args.use_dataloader: def data_gen(): data_iter_size = config.batch_size // device_count @@ -380,14 +388,14 @@ def main(): y = y.reshape((-1, 1)) yield x, y - py_reader.decorate_tensor_provider(data_gen) + dataloader.set_batch_generator(data_gen) total_time = 0.0 for epoch_id in range(config.max_epoch): batch_times = [] epoch_start_time = time.time() - if args.use_py_reader: - train_ppl = train_an_epoch_py_reader(epoch_id, batch_times) + if args.use_dataloader: + train_ppl = train_an_epoch_dataloader(epoch_id, batch_times) else: train_ppl = train_an_epoch(epoch_id, batch_times) epoch_time = time.time() - epoch_start_time @@ -436,9 +444,9 @@ def main(): format( len(valid_data), config.batch_size, config.num_steps)) - save_model_dir = os.path.join(args.save_model_dir, str(epoch_id)) - fluid.io.save_persistables( - executor=exe, dirname=save_model_dir, main_program=main_program) + save_model_dir = os.path.join(args.save_model_dir, + str(epoch_id), "params") + fluid.save(main_program, save_model_dir) print("Saved model to: %s.\n" % save_model_dir) with profile_context(args.profile): diff --git a/PaddleNLP/models/language_model/lm_model.py b/PaddleNLP/models/language_model/lm_model.py index 731d8f5a..ff668b0c 100644 --- a/PaddleNLP/models/language_model/lm_model.py +++ b/PaddleNLP/models/language_model/lm_model.py @@ -32,7 +32,7 @@ def lm_model(hidden_size, init_scale=0.1, dropout=None, rnn_model='static', - use_py_reader=False): + use_dataloader=False): def padding_rnn(input_embedding, len=3, init_hidden=None, init_cell=None): weight_1_arr = [] weight_2_arr = [] @@ -255,34 +255,26 @@ def lm_model(hidden_size, return real_res, last_hidden, last_cell batch_size_each = batch_size // fluid.core.get_cuda_device_count() - if use_py_reader: - feed_shapes = [[batch_size_each, num_steps, 1], - [batch_size_each * num_steps, 1]] - py_reader = fluid.layers.py_reader( - capacity=16, shapes=feed_shapes, dtypes=['int64', 'int64']) - x, y = fluid.layers.read_file(py_reader) - else: - x = layers.data( - name="x", - shape=[batch_size_each, num_steps, 1], - dtype='int64', - append_batch_size=False) - y = layers.data( - name="y", - shape=[batch_size_each * num_steps, 1], - dtype='int64', - append_batch_size=False) - - init_hidden = layers.data( + x = fluid.data( + name="x", shape=[batch_size_each, num_steps, 1], dtype='int64') + y = fluid.data( + name="y", shape=[batch_size_each * num_steps, 1], dtype='int64') + + if use_dataloader: + dataloader = fluid.io.DataLoader.from_generator( + feed_list=[x, y], + capacity=16, + iterable=False, + use_double_buffer=True) + + init_hidden = fluid.data( name="init_hidden", shape=[num_layers, batch_size_each, hidden_size], - dtype='float32', - append_batch_size=False) - init_cell = layers.data( + dtype='float32') + init_cell = fluid.data( name="init_cell", shape=[num_layers, batch_size_each, hidden_size], - dtype='float32', - append_batch_size=False) + dtype='float32') init_cell.persistable = True init_hidden.persistable = True @@ -385,7 +377,7 @@ def lm_model(hidden_size, layers.assign(input=last_hidden, output=init_hidden) feeding_list = ['x', 'y', 'init_hidden', 'init_cell'] - if use_py_reader: - return loss, last_hidden, last_cell, feeding_list, py_reader + if use_dataloader: + return loss, last_hidden, last_cell, feeding_list, dataloader else: return loss, last_hidden, last_cell, feeding_list diff --git a/PaddleNLP/models/model_check.py b/PaddleNLP/models/model_check.py index af0612fd..9dd38485 100644 --- a/PaddleNLP/models/model_check.py +++ b/PaddleNLP/models/model_check.py @@ -22,6 +22,10 @@ def check_cuda(use_cuda, err = \ "\nYou can not set use_cuda = True in the model because you are using paddlepaddle-cpu.\n \ Please: 1. Install paddlepaddle-gpu to run your models on GPU or 2. Set use_cuda = False to run models on CPU.\n" ): + """ + Log error and exit when set use_gpu=true in paddlepaddle + cpu version. + """ try: if use_cuda == True and fluid.is_compiled_with_cuda() == False: print(err) @@ -30,6 +34,38 @@ def check_cuda(use_cuda, err = \ pass +def check_version(): + """ + Log error and exit when the installed version of paddlepaddle is + not satisfied. + """ + err = "PaddlePaddle version 1.6 or higher is required, " \ + "or a suitable develop version is satisfied as well. \n" \ + "Please make sure the version is good with your code." \ + + try: + fluid.require_version('1.6.0') + except Exception as e: + print(err) + sys.exit(1) + + +def check_version(): + """ + Log error and exit when the installed version of paddlepaddle is + not satisfied. + """ + err = "PaddlePaddle version 1.6 or higher is required, " \ + "or a suitable develop version is satisfied as well. \n" \ + "Please make sure the version is good with your code." \ + + try: + fluid.require_version('1.6.0') + except Exception as e: + print(err) + sys.exit(1) + + if __name__ == "__main__": check_cuda(True) diff --git a/dygraph/ptb_lm/args.py b/dygraph/ptb_lm/args.py index 294373bd..ad33ea1a 100644 --- a/dygraph/ptb_lm/args.py +++ b/dygraph/ptb_lm/args.py @@ -40,6 +40,16 @@ def parse_args(): parser.add_argument( '--log_path', help='path of the log file. If not set, logs are printed to console') + parser.add_argument( + '--save_model_dir', + type=str, + default="models", + help='dir of the saved model.') + parser.add_argument( + '--init_from_pretrain_model', + type=str, + default=None, + help='dir to init model.') parser.add_argument('--ce', action='store_true', help="run ce") args = parser.parse_args() return args diff --git a/dygraph/ptb_lm/model_check.py b/dygraph/ptb_lm/model_check.py new file mode 100644 index 00000000..106c28e6 --- /dev/null +++ b/dygraph/ptb_lm/model_check.py @@ -0,0 +1,58 @@ +#encoding=utf8 +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import sys +import paddle +import paddle.fluid as fluid + + +def check_cuda(use_cuda, err = \ + "\nYou can not set use_cuda = True in the model because you are using paddlepaddle-cpu.\n \ + Please: 1. Install paddlepaddle-gpu to run your models on GPU or 2. Set use_cuda = False to run models on CPU.\n" + ): + """ + Log error and exit when set use_gpu=true in paddlepaddle + cpu version. + """ + try: + if use_cuda == True and fluid.is_compiled_with_cuda() == False: + print(err) + sys.exit(1) + except Exception as e: + pass + + +def check_version(): + """ + Log error and exit when the installed version of paddlepaddle is + not satisfied. + """ + err = "PaddlePaddle version 1.6 or higher is required, " \ + "or a suitable develop version is satisfied as well. \n" \ + "Please make sure the version is good with your code." \ + + try: + fluid.require_version('1.6.0') + except Exception as e: + print(err) + sys.exit(1) + + +if __name__ == "__main__": + check_cuda(True) + + check_cuda(False) + + check_cuda(True, "This is only for testing.") diff --git a/dygraph/ptb_lm/ptb_dy.py b/dygraph/ptb_lm/ptb_dy.py index 0e1a1f00..858fb13b 100644 --- a/dygraph/ptb_lm/ptb_dy.py +++ b/dygraph/ptb_lm/ptb_dy.py @@ -14,6 +14,7 @@ from __future__ import print_function +import os import unittest import paddle.fluid as fluid import paddle.fluid.core as core @@ -25,6 +26,7 @@ import numpy as np import six import reader +import model_check import time from args import * @@ -200,7 +202,6 @@ class PtbModel(fluid.Layer): x_emb = self.embedding(input) - #print( self.x_emb.numpy() ) x_emb = fluid.layers.reshape( x_emb, shape=[-1, self.num_steps, self.hidden_size]) if self.dropout is not None and self.dropout > 0.0: @@ -211,7 +212,6 @@ class PtbModel(fluid.Layer): rnn_out, last_hidden, last_cell = self.simple_lstm_rnn(x_emb, init_h, init_c) - #print( "rnn_out", rnn_out.numpy() ) rnn_out = fluid.layers.reshape( rnn_out, shape=[-1, self.num_steps, self.hidden_size]) projection = fluid.layers.matmul(rnn_out, self.softmax_weight) @@ -228,14 +228,18 @@ class PtbModel(fluid.Layer): return loss, last_hidden, last_cell def debug_emb(self): - #print("1111", self.x_emb.gradient() ) np.save("emb_grad", self.x_emb.gradient()) def train_ptb_lm(): - args = parse_args() + + # check if set use_gpu=True in paddlepaddle cpu version + model_check.check_cuda(args.use_gpu) + # check if paddlepaddle version is satisfied + model_check.check_version() + model_type = args.model_type vocab_size = 10000 @@ -308,6 +312,15 @@ def train_ptb_lm(): init_scale=init_scale, dropout=dropout) + if args.init_from_pretrain_model: + if not os.path.exists(args.init_from_pretrain_model + '.pdparams'): + print(args.init_from_pretrain_model) + raise Warning("The pretrained params do not exist.") + return + fluid.load_dygraph(args.init_from_pretrain_model) + print("finish initing model from pretrained params from %s" % + (args.init_from_pretrain_model)) + dy_param_updated = dict() dy_param_init = dict() dy_loss = None @@ -409,15 +422,20 @@ def train_ptb_lm(): if batch_id > 0 and batch_id % log_interval == 0: ppl = np.exp(total_loss / iters) - print(epoch_id, "ppl ", batch_id, ppl[0], - sgd._global_learning_rate().numpy()) + print("-- Epoch:[%d]; Batch:[%d]; ppl: %.5f, lr: %.5f" % + (epoch_id, batch_id, ppl[0], + sgd._global_learning_rate().numpy())) print("one ecpoh finished", epoch_id) print("time cost ", time.time() - start_time) ppl = np.exp(total_loss / iters) - print("ppl ", epoch_id, ppl[0]) + print("-- Epoch:[%d]; ppl: %.5f" % (epoch_id, ppl[0])) if args.ce: print("kpis\ttrain_ppl\t%0.3f" % ppl[0]) + save_model_dir = os.path.join(args.save_model_dir, + str(epoch_id), 'params') + fluid.save_dygraph(ptb_model.state_dict(), save_model_dir) + print("Saved model to: %s.\n" % save_model_dir) eval(ptb_model, test_data) -- GitLab