Enhance the training processing of rnn_rearch (#3168)

* Refine the print and add timing for each step and epoch. * Enable the profile.

Enhance the training processing of rnn_rearch (#3168)
* Refine the print and add timing for each step and epoch. * Enable the profile.
e99b607e · Yiqun Liu · GitHub · 2ef93ad2 · e99b607e · e99b607e
2 changed file
--- a/PaddleNLP/unarchived/neural_machine_translation/rnn_search/args.py
+++ b/PaddleNLP/unarchived/neural_machine_translation/rnn_search/args.py
@@ -119,5 +119,15 @@ def parse_args():
        help="The flag indicating whether to run the task "
        "for continuous evaluation.")
+    parser.add_argument(
+        "--parallel",
+        action='store_true',
+        help="Whether execute with the data_parallel mode.")
+    parser.add_argument(
+        "--profile",
+        action='store_true',
+        help="Whether enable the profile.")
    args = parser.parse_args()
    return args
--- a/PaddleNLP/unarchived/neural_machine_translation/rnn_search/train.py
+++ b/PaddleNLP/unarchived/neural_machine_translation/rnn_search/train.py
@@ -20,12 +20,13 @@ import numpy as np
 import time
 import os
 import random
 import math
+import contextlib
 import paddle
 import paddle.fluid as fluid
 import paddle.fluid.framework as framework
+import paddle.fluid.profiler as profiler
 from paddle.fluid.executor import Executor
 import reader
@@ -45,7 +46,16 @@ import pickle
 SEED = 123
-def train():
+@contextlib.contextmanager
+def profile_context(profile=True):
+    if profile:
+        with profiler.profiler('All', 'total', 'seq2seq.profile'):
+            yield
+    else:
+        yield
+def main():
    args = parse_args()
    num_layers = args.num_layers
@@ -106,6 +116,29 @@ def train():
    exe = Executor(place)
    exe.run(framework.default_startup_program())
+    device_count = len(fluid.cuda_places()) if args.use_gpu else len(
+        fluid.cpu_places())
+    if device_count > 1:
+        raise Exception("Training using multi-GPUs is not supported now.")
+    exec_strategy = fluid.ExecutionStrategy()
+    exec_strategy.num_threads = device_count
+    exec_strategy.num_iteration_per_drop_scope = 100
+    build_strategy = fluid.BuildStrategy()
+    build_strategy.enable_inplace = True
+    build_strategy.memory_optimize = False
+#    build_strategy.fuse_all_optimizer_ops = True
+    if args.parallel:
+        train_program = fluid.compiler.CompiledProgram(
+            framework.default_main_program()).with_data_parallel(
+                loss_name=loss.name,
+                build_strategy=build_strategy,
+                exec_strategy=exec_strategy)
+    else:
+        train_program = framework.default_main_program()
    train_data_prefix = args.train_data_prefix
    eval_data_prefix = args.eval_data_prefix
    test_data_prefix = args.test_data_prefix
@@ -160,63 +193,78 @@ def train():
        return ppl
-    ce_time = []
+    def train():
-    ce_ppl = []
+        ce_time = []
-    max_epoch = args.max_epoch
+        ce_ppl = []
-    for epoch_id in range(max_epoch):
+        max_epoch = args.max_epoch
-        start_time = time.time()
+        for epoch_id in range(max_epoch):
-        print("epoch id", epoch_id)
+            start_time = time.time()
-        if args.enable_ce:
+            if args.enable_ce:
-            train_data_iter = reader.get_data_iter(train_data, batch_size, enable_ce=True)
+                train_data_iter = reader.get_data_iter(train_data, batch_size, enable_ce=True)
-        else:
+            else:
-            train_data_iter = reader.get_data_iter(train_data, batch_size)
+                train_data_iter = reader.get_data_iter(train_data, batch_size)
+            total_loss = 0
+            word_count = 0.0
+            batch_times = []
+            for batch_id, batch in enumerate(train_data_iter):
+                batch_start_time = time.time()
+                input_data_feed, word_num = prepare_input(batch, epoch_id=epoch_id)
+                fetch_outs = exe.run(program=train_program,
+                                     feed=input_data_feed,
+                                     fetch_list=[loss.name],
+                                     use_program_cache=True)
+                cost_train = np.array(fetch_outs[0])
+                total_loss += cost_train * batch_size
+                word_count += word_num
+                batch_end_time = time.time()
+                batch_time = batch_end_time - batch_start_time
+                batch_times.append(batch_time)
+                if batch_id > 0 and batch_id % 100 == 0:
+                    print(
+                        "-- Epoch:[%d]; Batch:[%d]; Time: %.5f s; ppl: %.5f"
+                        % (epoch_id, batch_id, batch_time, np.exp(total_loss / word_count)))
+                    ce_ppl.append(np.exp(total_loss / word_count))
+                    total_loss = 0.0
+                    word_count = 0.0
+            end_time = time.time()
+            epoch_time = end_time - start_time
+            ce_time.append(epoch_time)
+            print(
+                "\nTrain epoch:[%d]; Epoch Time: %.5f; avg_time: %.5f s/step\n"
+                % (epoch_id, epoch_time, sum(batch_times) / len(batch_times)))
+            if not args.profile:
+                dir_name = args.model_path + "/epoch_" + str(epoch_id)
+                print("begin to save", dir_name)
+                fluid.io.save_params(exe, dir_name)
+                print("save finished")
+                dev_ppl = eval(valid_data)
+                print("dev ppl", dev_ppl)
+                test_ppl = eval(test_data)
+                print("test ppl", test_ppl)
-        total_loss = 0
+        if args.enable_ce:
-        word_count = 0.0
+            card_num = get_cards()
-        for batch_id, batch in enumerate(train_data_iter):
+            _ppl = 0
+            _time = 0
-            input_data_feed, word_num = prepare_input(batch, epoch_id=epoch_id)
+            try:
-            fetch_outs = exe.run(feed=input_data_feed,
+                _time = ce_time[-1]
-                                 fetch_list=[loss.name],
+                _ppl = ce_ppl[-1]
-                                 use_program_cache=True)
+            except:
+                print("ce info error")
-            cost_train = np.array(fetch_outs[0])
+            print("kpis\ttrain_duration_card%s\t%s" %
+                    (card_num, _time))
-            total_loss += cost_train * batch_size
+            print("kpis\ttrain_ppl_card%s\t%f" %
-            word_count += word_num
+                (card_num, _ppl))
-            if batch_id > 0 and batch_id % 100 == 0:
+    with profile_context(args.profile):
-                print("ppl", batch_id, np.exp(total_loss / word_count))
+        train()
-                ce_ppl.append(np.exp(total_loss / word_count))
-                total_loss = 0.0
-                word_count = 0.0
-        end_time = time.time()
-        time_gap = end_time - start_time
-        ce_time.append(time_gap)
-        dir_name = args.model_path + "/epoch_" + str(epoch_id)
-        print("begin to save", dir_name)
-        fluid.io.save_params(exe, dir_name)
-        print("save finished")
-        dev_ppl = eval(valid_data)
-        print("dev ppl", dev_ppl)
-        test_ppl = eval(test_data)
-        print("test ppl", test_ppl)
-    if args.enable_ce:
-        card_num = get_cards()
-        _ppl = 0
-        _time = 0
-        try:
-            _time = ce_time[-1]
-            _ppl = ce_ppl[-1]
-        except:
-            print("ce info error")
-        print("kpis\ttrain_duration_card%s\t%s" %
-                (card_num, _time))
-        print("kpis\ttrain_ppl_card%s\t%f" %
-            (card_num, _ppl))
 def get_cards():
@@ -228,4 +276,4 @@ def get_cards():
 if __name__ == '__main__':
-    train()
+    main()