From 1d63dafdeb9bf9f0817d761bd090f5a9a517946f Mon Sep 17 00:00:00 2001 From: gongweibao Date: Mon, 15 Oct 2018 03:07:31 +0000 Subject: [PATCH] fix --- .../transformer/config.py | 1 + .../transformer/train.py | 23 +++++++++++++++---- 2 files changed, 20 insertions(+), 4 deletions(-) diff --git a/fluid/neural_machine_translation/transformer/config.py b/fluid/neural_machine_translation/transformer/config.py index ca119aa6..825ac0f3 100644 --- a/fluid/neural_machine_translation/transformer/config.py +++ b/fluid/neural_machine_translation/transformer/config.py @@ -32,6 +32,7 @@ class TrainTaskConfig(object): start_step = 0 # the frequency to save trained models. save_freq = 10000 + profile=True class InferTaskConfig(object): diff --git a/fluid/neural_machine_translation/transformer/train.py b/fluid/neural_machine_translation/transformer/train.py index c4b6d6d9..ba8f4c24 100644 --- a/fluid/neural_machine_translation/transformer/train.py +++ b/fluid/neural_machine_translation/transformer/train.py @@ -7,6 +7,7 @@ import time import numpy as np import paddle.fluid as fluid +import paddle.fluid.profiler as profiler import reader from config import * @@ -130,6 +131,9 @@ def parse_args(): default=100, help="Fetch outputs steps.") + #parser.add_argument( + # '--profile', action='store_true', help='If set, profile a few steps.') + args = parser.parse_args() # Append args related to dict @@ -467,8 +471,8 @@ def train_loop(exe, train_prog, startup_prog, dev_count, sum_cost, avg_cost, #build_strategy.gradient_scale_strategy = fluid.BuildStrategy.GradientScaleStrategy.Customized exec_strategy = fluid.ExecutionStrategy() - if args.update_method == "nccl2": - exec_strategy.num_threads = 1 + #if args.update_method == "nccl2": + exec_strategy.num_threads = 1 logging.info("begin executor") train_exe = fluid.ParallelExecutor( @@ -509,11 +513,22 @@ def train_loop(exe, train_prog, startup_prog, dev_count, sum_cost, avg_cost, feed_dict_list = prepare_feed_dict_list(data_generator, init_flag, dev_count) + if TrainTaskConfig.profile and batch_id == 5: + logging.info("begin profiler") + profiler.start_profiler("All") + profiler.reset_profiler() + elif TrainTaskConfig.profile and batch_id == 10: + logging.info("end profiler") + #logging.info("profiling total time: ", time.time() - start_time) + profiler.stop_profiler("total", "./transformer_local_profile_{}_pass{}".format(batch_id, pass_id)) + sys.exit(0) + + logging.info("batch_id:{}".format(batch_id)) outs = train_exe.run( - fetch_list=[sum_cost.name, token_num.name] if batch_id % args.fetch_steps == 0 else[], + fetch_list=[sum_cost.name, token_num.name] if (batch_id % args.fetch_steps == 0 or TrainTaskConfig.profile) else[], feed=feed_dict_list) - if batch_id % args.fetch_steps == 0 and batch_id > 0: + if (batch_id % args.fetch_steps == 0 and batch_id > 0): sum_cost_val, token_num_val = np.array(outs[0]), np.array(outs[ 1]) # sum the cost from multi-devices -- GitLab