From dba5be0f62b44abbb1b65534bd7201b3e94a42c6 Mon Sep 17 00:00:00 2001
From: zhengya01 <43601548+zhengya01@users.noreply.github.com>
Date: Fri, 21 Feb 2020 16:01:11 +0800
Subject: [PATCH] add ce for ptb_lm (#4328)

---
 dygraph/ptb_lm/ptb_dy.py | 21 +++++++++++++++++----
 1 file changed, 17 insertions(+), 4 deletions(-)

diff --git a/dygraph/ptb_lm/ptb_dy.py b/dygraph/ptb_lm/ptb_dy.py
index 86411a02..3ac11e05 100644
--- a/dygraph/ptb_lm/ptb_dy.py
+++ b/dygraph/ptb_lm/ptb_dy.py
@@ -220,6 +220,8 @@ def train_ptb_lm():
     if args.use_gpu == True:
         place = core.CUDAPlace(0)
 
+    dev_count = fluid.core.get_cuda_device_count()
+
     # check if paddlepaddle version is satisfied
     model_check.check_version()
 
@@ -363,9 +365,9 @@ def train_ptb_lm():
             print("eval finished")
             ppl = np.exp(total_loss / iters)
             print("ppl ", batch_id, ppl[0])
-            if args.ce:
-                print("kpis\ttest_ppl\t%0.3f" % ppl[0])
 
+        ce_time = []
+        ce_ppl = []
         grad_clip = fluid.dygraph_grad_clip.GradClipByGlobalNorm(max_grad_norm)
         for epoch_id in range(max_epoch):
             ptb_model.train()
@@ -412,6 +414,8 @@ def train_ptb_lm():
             print("one epoch finished", epoch_id)
             print("time cost ", time.time() - start_time)
             ppl = np.exp(total_loss / iters)
+            ce_time.append(time.time() - start_time)
+            ce_ppl.append(ppl[0])
             print("-- Epoch:[%d]; ppl: %.5f" % (epoch_id, ppl[0]))
 
             if batch_size <= 20 and epoch_id == 0 and ppl[0] > 1000:
@@ -421,8 +425,6 @@ def train_ptb_lm():
                 print("Abort this training process and please start again.")
                 return 
 
-            if args.ce:
-                print("kpis\ttrain_ppl\t%0.3f" % ppl[0])
             save_model_dir = os.path.join(args.save_model_dir,
                                           str(epoch_id), 'params')
             fluid.save_dygraph(ptb_model.state_dict(), save_model_dir)
@@ -430,6 +432,17 @@ def train_ptb_lm():
 
             eval(ptb_model, valid_data)
 
+        if args.ce:
+            _ppl = 0
+            _time = 0
+            try:
+                _time = ce_time[-1]
+                _ppl = ce_ppl[-1]
+            except:
+                print("ce info error")
+            print("kpis\ttrain_duration_card%s\t%s" % (dev_count, _time))
+            print("kpis\ttrain_ppl_card%s\t%f" % (dev_count, _ppl))
+
         eval(ptb_model, test_data)
 
 train_ptb_lm()
-- 
GitLab