From 9e5cfbf877c4443f3fe3b18c90d35ba76af1c6f3 Mon Sep 17 00:00:00 2001
From: Li Fuchen <lfchener@outlook.com>
Date: Fri, 25 Oct 2019 17:02:53 +0800
Subject: [PATCH] Cherry pick from 1.6 to develop (#3765)

* Use new save/load api and change fluid.layers.data to fluid.data (#3547)

* Use new save/load api and change fluid.layers.data to fluid.data

* add check for paddle version

* Use new save/load api in ptb_lm (#3546)

* Use new save/load api in ptb_lm

* add check for paddle version

* unify reader to dataloader (#3488)

unify reader to dataloader

* Use new save/load api and change fluid.layers.data to fluid.data (#3547)

* Use new save/load api and change fluid.layers.data to fluid.data

* add check for paddle version
---
 AutoDL                                      |  1 -
 PaddleNLP/language_model/README.md          |  2 +
 PaddleNLP/language_model/args.py            |  9 +++-
 PaddleNLP/language_model/run.sh             |  1 +
 PaddleNLP/language_model/train.py           | 46 +++++++++-------
 PaddleNLP/models/language_model/lm_model.py | 46 +++++++---------
 PaddleNLP/models/model_check.py             | 36 +++++++++++++
 dygraph/ptb_lm/args.py                      | 10 ++++
 dygraph/ptb_lm/model_check.py               | 58 +++++++++++++++++++++
 dygraph/ptb_lm/ptb_dy.py                    | 32 +++++++++---
 10 files changed, 185 insertions(+), 56 deletions(-)
 delete mode 160000 AutoDL
 create mode 100644 dygraph/ptb_lm/model_check.py

diff --git a/AutoDL b/AutoDL
deleted file mode 160000
index 5447bcf7..00000000
--- a/AutoDL
+++ /dev/null
@@ -1 +0,0 @@
-Subproject commit 5447bcf72d92b58e9efe38e4aa0d47bab68bec31
diff --git a/PaddleNLP/language_model/README.md b/PaddleNLP/language_model/README.md
index 6a989d49..10b882a7 100644
--- a/PaddleNLP/language_model/README.md
+++ b/PaddleNLP/language_model/README.md
@@ -5,6 +5,8 @@
 ## 1. 任务说明
 本文主要介绍基于lstm的语言的模型的实现，给定一个输入词序列（中文分词、英文tokenize），计算其ppl（语言模型困惑度，用户表示句子的流利程度），基于循环神经网络语言模型的介绍可以[参阅论文](https://arxiv.org/abs/1409.2329)。相对于传统的方法，基于循环神经网络的方法能够更好的解决稀疏词的问题。
 
+**目前语言模型要求使用PaddlePaddle 1.6及以上版本或适当的develop版本。**
+
 同时推荐用户参考[IPython Notebook demo](https://aistudio.baidu.com/aistudio/projectDetail/122290)
 
 ## 2. 效果说明
diff --git a/PaddleNLP/language_model/args.py b/PaddleNLP/language_model/args.py
index eef0af99..8014bb52 100644
--- a/PaddleNLP/language_model/args.py
+++ b/PaddleNLP/language_model/args.py
@@ -60,10 +60,10 @@ def parse_args():
         default=False,
         help='Whether profiling the trainning [True|False]')
     parser.add_argument(
-        '--use_py_reader',
+        '--use_dataloader',
         type=str2bool,
         default=False,
-        help='Whether using py_reader to feed data [True|False]')
+        help='Whether using dataloader to feed data [True|False]')
     parser.add_argument(
         '--log_path',
         help='path of the log file. If not set, logs are printed to console')
@@ -72,6 +72,11 @@ def parse_args():
         type=str,
         default="models",
         help='dir of the saved model.')
+    parser.add_argument(
+        '--init_from_pretrain_model',
+        type=str,
+        default=None,
+        help='dir to init model.')
     parser.add_argument('--enable_ce', action='store_true')
     parser.add_argument('--batch_size', type=int, default=0, help='batch size')
     parser.add_argument('--max_epoch', type=int, default=0, help='max epoch')
diff --git a/PaddleNLP/language_model/run.sh b/PaddleNLP/language_model/run.sh
index d836c4ac..851c8977 100644
--- a/PaddleNLP/language_model/run.sh
+++ b/PaddleNLP/language_model/run.sh
@@ -7,6 +7,7 @@ function run_train() {
         --data_path data/simple-examples/data/ \
         --model_type small \
         --use_gpu True
+        #--init_from_pretrain_model models/0/params
 }
 
 run_train
diff --git a/PaddleNLP/language_model/train.py b/PaddleNLP/language_model/train.py
index 169c5c70..0c3c3d8b 100644
--- a/PaddleNLP/language_model/train.py
+++ b/PaddleNLP/language_model/train.py
@@ -40,7 +40,7 @@ import os
 os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3"
 
 from args import *
-from models.model_check import check_cuda
+from models.model_check import check_cuda, check_version
 from models.language_model import lm_model
 from config import RNNConfig
 import logging
@@ -88,7 +88,10 @@ def save_para_npz(train_prog, train_exe):
 def main():
     args = parse_args()
 
+    # check if set use_gpu=True in paddlepaddle cpu version
     check_cuda(args.use_gpu)
+    # check if paddlepaddle version is satisfied
+    check_version()
 
     logger = logging.getLogger("lm")
     logger.setLevel(logging.INFO)
@@ -124,10 +127,10 @@ def main():
                 init_scale=config.init_scale,
                 dropout=config.dropout,
                 rnn_model=config.rnn_model,
-                use_py_reader=args.use_py_reader)
+                use_dataloader=args.use_dataloader)
 
-            if args.use_py_reader:
-                py_reader = res_vars[-1]
+            if args.use_dataloader:
+                dataloader = res_vars[-1]
                 res_vars = res_vars[:-1]
             loss, last_hidden, last_cell, feed_order = res_vars
 
@@ -159,7 +162,7 @@ def main():
                 init_scale=config.init_scale,
                 dropout=config.dropout,
                 rnn_model=config.rnn_model,
-                use_py_reader=False)
+                use_dataloader=False)
     # Some op behaves differently for train and inference, we need to call
     # this clone function to ensure every op is right for inference.
     inference_program = inference_program.clone(for_test=True)
@@ -168,6 +171,15 @@ def main():
     exe = Executor(place)
     exe.run(startup_program)
 
+    if args.init_from_pretrain_model:
+        if not os.path.exists(args.init_from_pretrain_model + '.pdparams'):
+            print(args.init_from_pretrain_model)
+            raise Warning("The pretrained params do not exist.")
+            return
+        fluid.load(main_program, args.init_from_pretrain_model)
+        print("finish initing model from pretrained params from %s" %
+              (args.init_from_pretrain_model))
+
     device_count = len(fluid.cuda_places()) if args.use_gpu else len(
         fluid.cpu_places())
 
@@ -176,8 +188,6 @@ def main():
     exec_strategy.num_iteration_per_drop_scope = 100
 
     build_strategy = fluid.BuildStrategy()
-    build_strategy.enable_inplace = True
-    build_strategy.memory_optimize = False
     build_strategy.fuse_all_optimizer_ops = True
 
     if args.parallel:
@@ -282,7 +292,6 @@ def main():
                 epoch_id=epoch_id,
                 with_lr=True,
                 device_count=device_count)
-
             batch_start_time = time.time()
             fetch_outs = exe.run(train_program,
                                  feed=input_data_feed,
@@ -306,11 +315,10 @@ def main():
                 print(
                     "-- Epoch:[%d]; Batch:[%d]; Time: %.5f s; ppl: %.5f, lr: %.5f"
                     % (epoch_id, batch_id, batch_time, ppl[0], lr[0]))
-
         ppl = np.exp(total_loss / iters)
         return ppl
 
-    def train_an_epoch_py_reader(epoch_id, batch_times):
+    def train_an_epoch_dataloader(epoch_id, batch_times):
         # get train epoch size
         log_interval = get_log_interval(len(train_data))
 
@@ -319,7 +327,7 @@ def main():
         total_loss = 0
         iters = 0
 
-        py_reader.start()
+        dataloader.start()
         batch_id = 0
         try:
             while True:
@@ -361,14 +369,14 @@ def main():
 
                 batch_id += 1
         except fluid.core.EOFException:
-            py_reader.reset()
+            dataloader.reset()
 
         batch_times.append(time.time() - batch_start_time)
         ppl = np.exp(total_loss / iters)
         return ppl
 
     def train():
-        if args.use_py_reader:
+        if args.use_dataloader:
 
             def data_gen():
                 data_iter_size = config.batch_size // device_count
@@ -380,14 +388,14 @@ def main():
                     y = y.reshape((-1, 1))
                     yield x, y
 
-            py_reader.decorate_tensor_provider(data_gen)
+            dataloader.set_batch_generator(data_gen)
 
         total_time = 0.0
         for epoch_id in range(config.max_epoch):
             batch_times = []
             epoch_start_time = time.time()
-            if args.use_py_reader:
-                train_ppl = train_an_epoch_py_reader(epoch_id, batch_times)
+            if args.use_dataloader:
+                train_ppl = train_an_epoch_dataloader(epoch_id, batch_times)
             else:
                 train_ppl = train_an_epoch(epoch_id, batch_times)
             epoch_time = time.time() - epoch_start_time
@@ -436,9 +444,9 @@ def main():
                     format(
                         len(valid_data), config.batch_size, config.num_steps))
 
-            save_model_dir = os.path.join(args.save_model_dir, str(epoch_id))
-            fluid.io.save_persistables(
-                executor=exe, dirname=save_model_dir, main_program=main_program)
+            save_model_dir = os.path.join(args.save_model_dir,
+                                          str(epoch_id), "params")
+            fluid.save(main_program, save_model_dir)
             print("Saved model to: %s.\n" % save_model_dir)
 
     with profile_context(args.profile):
diff --git a/PaddleNLP/models/language_model/lm_model.py b/PaddleNLP/models/language_model/lm_model.py
index 731d8f5a..ff668b0c 100644
--- a/PaddleNLP/models/language_model/lm_model.py
+++ b/PaddleNLP/models/language_model/lm_model.py
@@ -32,7 +32,7 @@ def lm_model(hidden_size,
              init_scale=0.1,
              dropout=None,
              rnn_model='static',
-             use_py_reader=False):
+             use_dataloader=False):
     def padding_rnn(input_embedding, len=3, init_hidden=None, init_cell=None):
         weight_1_arr = []
         weight_2_arr = []
@@ -255,34 +255,26 @@ def lm_model(hidden_size,
         return real_res, last_hidden, last_cell
 
     batch_size_each = batch_size // fluid.core.get_cuda_device_count()
-    if use_py_reader:
-        feed_shapes = [[batch_size_each, num_steps, 1],
-                       [batch_size_each * num_steps, 1]]
-        py_reader = fluid.layers.py_reader(
-            capacity=16, shapes=feed_shapes, dtypes=['int64', 'int64'])
-        x, y = fluid.layers.read_file(py_reader)
-    else:
-        x = layers.data(
-            name="x",
-            shape=[batch_size_each, num_steps, 1],
-            dtype='int64',
-            append_batch_size=False)
-        y = layers.data(
-            name="y",
-            shape=[batch_size_each * num_steps, 1],
-            dtype='int64',
-            append_batch_size=False)
-
-    init_hidden = layers.data(
+    x = fluid.data(
+        name="x", shape=[batch_size_each, num_steps, 1], dtype='int64')
+    y = fluid.data(
+        name="y", shape=[batch_size_each * num_steps, 1], dtype='int64')
+
+    if use_dataloader:
+        dataloader = fluid.io.DataLoader.from_generator(
+            feed_list=[x, y],
+            capacity=16,
+            iterable=False,
+            use_double_buffer=True)
+
+    init_hidden = fluid.data(
         name="init_hidden",
         shape=[num_layers, batch_size_each, hidden_size],
-        dtype='float32',
-        append_batch_size=False)
-    init_cell = layers.data(
+        dtype='float32')
+    init_cell = fluid.data(
         name="init_cell",
         shape=[num_layers, batch_size_each, hidden_size],
-        dtype='float32',
-        append_batch_size=False)
+        dtype='float32')
 
     init_cell.persistable = True
     init_hidden.persistable = True
@@ -385,7 +377,7 @@ def lm_model(hidden_size,
     layers.assign(input=last_hidden, output=init_hidden)
 
     feeding_list = ['x', 'y', 'init_hidden', 'init_cell']
-    if use_py_reader:
-        return loss, last_hidden, last_cell, feeding_list, py_reader
+    if use_dataloader:
+        return loss, last_hidden, last_cell, feeding_list, dataloader
     else:
         return loss, last_hidden, last_cell, feeding_list
diff --git a/PaddleNLP/models/model_check.py b/PaddleNLP/models/model_check.py
index af0612fd..9dd38485 100644
--- a/PaddleNLP/models/model_check.py
+++ b/PaddleNLP/models/model_check.py
@@ -22,6 +22,10 @@ def check_cuda(use_cuda, err = \
     "\nYou can not set use_cuda = True in the model because you are using paddlepaddle-cpu.\n \
     Please: 1. Install paddlepaddle-gpu to run your models on GPU or 2. Set use_cuda = False to run models on CPU.\n"
                                                                                                                      ):
+    """
+    Log error and exit when set use_gpu=true in paddlepaddle
+    cpu version.
+    """
     try:
         if use_cuda == True and fluid.is_compiled_with_cuda() == False:
             print(err)
@@ -30,6 +34,38 @@ def check_cuda(use_cuda, err = \
         pass
 
 
+def check_version():
+    """
+    Log error and exit when the installed version of paddlepaddle is
+    not satisfied.
+    """
+    err = "PaddlePaddle version 1.6 or higher is required, " \
+          "or a suitable develop version is satisfied as well. \n" \
+          "Please make sure the version is good with your code." \
+
+    try:
+        fluid.require_version('1.6.0')
+    except Exception as e:
+        print(err)
+        sys.exit(1)
+
+
+def check_version():
+    """
+    Log error and exit when the installed version of paddlepaddle is
+    not satisfied.
+    """
+    err = "PaddlePaddle version 1.6 or higher is required, " \
+          "or a suitable develop version is satisfied as well. \n" \
+          "Please make sure the version is good with your code." \
+
+    try:
+        fluid.require_version('1.6.0')
+    except Exception as e:
+        print(err)
+        sys.exit(1)
+
+
 if __name__ == "__main__":
     check_cuda(True)
 
diff --git a/dygraph/ptb_lm/args.py b/dygraph/ptb_lm/args.py
index 294373bd..ad33ea1a 100644
--- a/dygraph/ptb_lm/args.py
+++ b/dygraph/ptb_lm/args.py
@@ -40,6 +40,16 @@ def parse_args():
     parser.add_argument(
         '--log_path',
         help='path of the log file. If not set, logs are printed to console')
+    parser.add_argument(
+        '--save_model_dir',
+        type=str,
+        default="models",
+        help='dir of the saved model.')
+    parser.add_argument(
+        '--init_from_pretrain_model',
+        type=str,
+        default=None,
+        help='dir to init model.')
     parser.add_argument('--ce', action='store_true', help="run ce")
     args = parser.parse_args()
     return args
diff --git a/dygraph/ptb_lm/model_check.py b/dygraph/ptb_lm/model_check.py
new file mode 100644
index 00000000..106c28e6
--- /dev/null
+++ b/dygraph/ptb_lm/model_check.py
@@ -0,0 +1,58 @@
+#encoding=utf8
+#   Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import sys
+import paddle
+import paddle.fluid as fluid
+
+
+def check_cuda(use_cuda, err = \
+    "\nYou can not set use_cuda = True in the model because you are using paddlepaddle-cpu.\n \
+    Please: 1. Install paddlepaddle-gpu to run your models on GPU or 2. Set use_cuda = False to run models on CPU.\n"
+                                                                                                                     ):
+    """
+    Log error and exit when set use_gpu=true in paddlepaddle
+    cpu version.
+    """
+    try:
+        if use_cuda == True and fluid.is_compiled_with_cuda() == False:
+            print(err)
+            sys.exit(1)
+    except Exception as e:
+        pass
+
+
+def check_version():
+    """
+    Log error and exit when the installed version of paddlepaddle is
+    not satisfied.
+    """
+    err = "PaddlePaddle version 1.6 or higher is required, " \
+          "or a suitable develop version is satisfied as well. \n" \
+          "Please make sure the version is good with your code." \
+
+    try:
+        fluid.require_version('1.6.0')
+    except Exception as e:
+        print(err)
+        sys.exit(1)
+
+
+if __name__ == "__main__":
+    check_cuda(True)
+
+    check_cuda(False)
+
+    check_cuda(True, "This is only for testing.")
diff --git a/dygraph/ptb_lm/ptb_dy.py b/dygraph/ptb_lm/ptb_dy.py
index 0e1a1f00..858fb13b 100644
--- a/dygraph/ptb_lm/ptb_dy.py
+++ b/dygraph/ptb_lm/ptb_dy.py
@@ -14,6 +14,7 @@
 
 from __future__ import print_function
 
+import os
 import unittest
 import paddle.fluid as fluid
 import paddle.fluid.core as core
@@ -25,6 +26,7 @@ import numpy as np
 import six
 
 import reader
+import model_check
 import time
 
 from args import *
@@ -200,7 +202,6 @@ class PtbModel(fluid.Layer):
 
         x_emb = self.embedding(input)
 
-        #print( self.x_emb.numpy() )
         x_emb = fluid.layers.reshape(
             x_emb, shape=[-1, self.num_steps, self.hidden_size])
         if self.dropout is not None and self.dropout > 0.0:
@@ -211,7 +212,6 @@ class PtbModel(fluid.Layer):
         rnn_out, last_hidden, last_cell = self.simple_lstm_rnn(x_emb, init_h,
                                                                init_c)
 
-        #print( "rnn_out", rnn_out.numpy() )
         rnn_out = fluid.layers.reshape(
             rnn_out, shape=[-1, self.num_steps, self.hidden_size])
         projection = fluid.layers.matmul(rnn_out, self.softmax_weight)
@@ -228,14 +228,18 @@ class PtbModel(fluid.Layer):
         return loss, last_hidden, last_cell
 
     def debug_emb(self):
-        #print("1111", self.x_emb.gradient() )
 
         np.save("emb_grad", self.x_emb.gradient())
 
 
 def train_ptb_lm():
-
     args = parse_args()
+
+    # check if set use_gpu=True in paddlepaddle cpu version
+    model_check.check_cuda(args.use_gpu)
+    # check if paddlepaddle version is satisfied
+    model_check.check_version()
+
     model_type = args.model_type
 
     vocab_size = 10000
@@ -308,6 +312,15 @@ def train_ptb_lm():
             init_scale=init_scale,
             dropout=dropout)
 
+        if args.init_from_pretrain_model:
+            if not os.path.exists(args.init_from_pretrain_model + '.pdparams'):
+                print(args.init_from_pretrain_model)
+                raise Warning("The pretrained params do not exist.")
+                return
+            fluid.load_dygraph(args.init_from_pretrain_model)
+            print("finish initing model from pretrained params from %s" %
+                  (args.init_from_pretrain_model))
+
         dy_param_updated = dict()
         dy_param_init = dict()
         dy_loss = None
@@ -409,15 +422,20 @@ def train_ptb_lm():
 
                 if batch_id > 0 and batch_id % log_interval == 0:
                     ppl = np.exp(total_loss / iters)
-                    print(epoch_id, "ppl ", batch_id, ppl[0],
-                          sgd._global_learning_rate().numpy())
+                    print("-- Epoch:[%d]; Batch:[%d]; ppl: %.5f, lr: %.5f" %
+                          (epoch_id, batch_id, ppl[0],
+                           sgd._global_learning_rate().numpy()))
 
             print("one ecpoh finished", epoch_id)
             print("time cost ", time.time() - start_time)
             ppl = np.exp(total_loss / iters)
-            print("ppl ", epoch_id, ppl[0])
+            print("-- Epoch:[%d]; ppl: %.5f" % (epoch_id, ppl[0]))
             if args.ce:
                 print("kpis\ttrain_ppl\t%0.3f" % ppl[0])
+            save_model_dir = os.path.join(args.save_model_dir,
+                                          str(epoch_id), 'params')
+            fluid.save_dygraph(ptb_model.state_dict(), save_model_dir)
+            print("Saved model to: %s.\n" % save_model_dir)
 
         eval(ptb_model, test_data)
 
-- 
GitLab