From f3a6dbbc36293da91bd0ded5ef12b374939700b5 Mon Sep 17 00:00:00 2001
From: Li Fuchen <lfchener@outlook.com>
Date: Wed, 16 Oct 2019 17:16:00 +0800
Subject: [PATCH] Use new save/load api in ptb_lm (#3546)

* Use new save/load api in ptb_lm

* add check for paddle version
---
 dygraph/ptb_lm/args.py        | 10 ++++++
 dygraph/ptb_lm/model_check.py | 58 +++++++++++++++++++++++++++++++++++
 dygraph/ptb_lm/ptb_dy.py      | 32 ++++++++++++++-----
 3 files changed, 93 insertions(+), 7 deletions(-)
 create mode 100644 dygraph/ptb_lm/model_check.py

diff --git a/dygraph/ptb_lm/args.py b/dygraph/ptb_lm/args.py
index 294373bd..ad33ea1a 100644
--- a/dygraph/ptb_lm/args.py
+++ b/dygraph/ptb_lm/args.py
@@ -40,6 +40,16 @@ def parse_args():
     parser.add_argument(
         '--log_path',
         help='path of the log file. If not set, logs are printed to console')
+    parser.add_argument(
+        '--save_model_dir',
+        type=str,
+        default="models",
+        help='dir of the saved model.')
+    parser.add_argument(
+        '--init_from_pretrain_model',
+        type=str,
+        default=None,
+        help='dir to init model.')
     parser.add_argument('--ce', action='store_true', help="run ce")
     args = parser.parse_args()
     return args
diff --git a/dygraph/ptb_lm/model_check.py b/dygraph/ptb_lm/model_check.py
new file mode 100644
index 00000000..106c28e6
--- /dev/null
+++ b/dygraph/ptb_lm/model_check.py
@@ -0,0 +1,58 @@
+#encoding=utf8
+#   Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import sys
+import paddle
+import paddle.fluid as fluid
+
+
+def check_cuda(use_cuda, err = \
+    "\nYou can not set use_cuda = True in the model because you are using paddlepaddle-cpu.\n \
+    Please: 1. Install paddlepaddle-gpu to run your models on GPU or 2. Set use_cuda = False to run models on CPU.\n"
+                                                                                                                     ):
+    """
+    Log error and exit when set use_gpu=true in paddlepaddle
+    cpu version.
+    """
+    try:
+        if use_cuda == True and fluid.is_compiled_with_cuda() == False:
+            print(err)
+            sys.exit(1)
+    except Exception as e:
+        pass
+
+
+def check_version():
+    """
+    Log error and exit when the installed version of paddlepaddle is
+    not satisfied.
+    """
+    err = "PaddlePaddle version 1.6 or higher is required, " \
+          "or a suitable develop version is satisfied as well. \n" \
+          "Please make sure the version is good with your code." \
+
+    try:
+        fluid.require_version('1.6.0')
+    except Exception as e:
+        print(err)
+        sys.exit(1)
+
+
+if __name__ == "__main__":
+    check_cuda(True)
+
+    check_cuda(False)
+
+    check_cuda(True, "This is only for testing.")
diff --git a/dygraph/ptb_lm/ptb_dy.py b/dygraph/ptb_lm/ptb_dy.py
index 0e1a1f00..858fb13b 100644
--- a/dygraph/ptb_lm/ptb_dy.py
+++ b/dygraph/ptb_lm/ptb_dy.py
@@ -14,6 +14,7 @@
 
 from __future__ import print_function
 
+import os
 import unittest
 import paddle.fluid as fluid
 import paddle.fluid.core as core
@@ -25,6 +26,7 @@ import numpy as np
 import six
 
 import reader
+import model_check
 import time
 
 from args import *
@@ -200,7 +202,6 @@ class PtbModel(fluid.Layer):
 
         x_emb = self.embedding(input)
 
-        #print( self.x_emb.numpy() )
         x_emb = fluid.layers.reshape(
             x_emb, shape=[-1, self.num_steps, self.hidden_size])
         if self.dropout is not None and self.dropout > 0.0:
@@ -211,7 +212,6 @@ class PtbModel(fluid.Layer):
         rnn_out, last_hidden, last_cell = self.simple_lstm_rnn(x_emb, init_h,
                                                                init_c)
 
-        #print( "rnn_out", rnn_out.numpy() )
         rnn_out = fluid.layers.reshape(
             rnn_out, shape=[-1, self.num_steps, self.hidden_size])
         projection = fluid.layers.matmul(rnn_out, self.softmax_weight)
@@ -228,14 +228,18 @@ class PtbModel(fluid.Layer):
         return loss, last_hidden, last_cell
 
     def debug_emb(self):
-        #print("1111", self.x_emb.gradient() )
 
         np.save("emb_grad", self.x_emb.gradient())
 
 
 def train_ptb_lm():
-
     args = parse_args()
+
+    # check if set use_gpu=True in paddlepaddle cpu version
+    model_check.check_cuda(args.use_gpu)
+    # check if paddlepaddle version is satisfied
+    model_check.check_version()
+
     model_type = args.model_type
 
     vocab_size = 10000
@@ -308,6 +312,15 @@ def train_ptb_lm():
             init_scale=init_scale,
             dropout=dropout)
 
+        if args.init_from_pretrain_model:
+            if not os.path.exists(args.init_from_pretrain_model + '.pdparams'):
+                print(args.init_from_pretrain_model)
+                raise Warning("The pretrained params do not exist.")
+                return
+            fluid.load_dygraph(args.init_from_pretrain_model)
+            print("finish initing model from pretrained params from %s" %
+                  (args.init_from_pretrain_model))
+
         dy_param_updated = dict()
         dy_param_init = dict()
         dy_loss = None
@@ -409,15 +422,20 @@ def train_ptb_lm():
 
                 if batch_id > 0 and batch_id % log_interval == 0:
                     ppl = np.exp(total_loss / iters)
-                    print(epoch_id, "ppl ", batch_id, ppl[0],
-                          sgd._global_learning_rate().numpy())
+                    print("-- Epoch:[%d]; Batch:[%d]; ppl: %.5f, lr: %.5f" %
+                          (epoch_id, batch_id, ppl[0],
+                           sgd._global_learning_rate().numpy()))
 
             print("one ecpoh finished", epoch_id)
             print("time cost ", time.time() - start_time)
             ppl = np.exp(total_loss / iters)
-            print("ppl ", epoch_id, ppl[0])
+            print("-- Epoch:[%d]; ppl: %.5f" % (epoch_id, ppl[0]))
             if args.ce:
                 print("kpis\ttrain_ppl\t%0.3f" % ppl[0])
+            save_model_dir = os.path.join(args.save_model_dir,
+                                          str(epoch_id), 'params')
+            fluid.save_dygraph(ptb_model.state_dict(), save_model_dir)
+            print("Saved model to: %s.\n" % save_model_dir)
 
         eval(ptb_model, test_data)
 
-- 
GitLab