未验证 提交 afaf06e7 编写于 作者: W wanghuancoder 提交者: GitHub

refine resnet benchmard print (#4893)

* fix ptb_dy time print for benchmark, test=develop

* refine resnet benchmard print, test=develop
上级 4000dfb1
...@@ -120,6 +120,7 @@ def validate(args, ...@@ -120,6 +120,7 @@ def validate(args,
test_batch_time_record = [] test_batch_time_record = []
test_batch_metrics_record = [] test_batch_metrics_record = []
test_batch_id = 0 test_batch_id = 0
if int(os.environ.get('PADDLE_TRAINERS_NUM', 1)) > 1: if int(os.environ.get('PADDLE_TRAINERS_NUM', 1)) > 1:
compiled_program = test_prog compiled_program = test_prog
else: else:
...@@ -245,6 +246,7 @@ def train(args): ...@@ -245,6 +246,7 @@ def train(args):
train_fetch_vars[0], exe) train_fetch_vars[0], exe)
batch_cost_avg = TimeCostAverage() batch_cost_avg = TimeCostAverage()
reader_cost_avg = TimeCostAverage()
#NOTE: this for benchmark #NOTE: this for benchmark
...@@ -267,11 +269,14 @@ def train(args): ...@@ -267,11 +269,14 @@ def train(args):
#NOTE: this is for benchmark #NOTE: this is for benchmark
if args.max_iter and total_batch_num == args.max_iter: if args.max_iter and total_batch_num == args.max_iter:
return return
t2 = time.time()
reader_cost = t2 - t1
reader_cost_avg.record(reader_cost)
train_batch_metrics = exe.run(compiled_train_prog, train_batch_metrics = exe.run(compiled_train_prog,
feed=batch, feed=batch,
fetch_list=train_fetch_list) fetch_list=train_fetch_list)
t2 = time.time() t3 = time.time()
train_batch_elapse = t2 - t1 train_batch_elapse = t3 - t1
train_batch_time_record.append(train_batch_elapse) train_batch_time_record.append(train_batch_elapse)
batch_cost_avg.record(train_batch_elapse) batch_cost_avg.record(train_batch_elapse)
...@@ -279,11 +284,18 @@ def train(args): ...@@ -279,11 +284,18 @@ def train(args):
np.array(train_batch_metrics), axis=1) np.array(train_batch_metrics), axis=1)
train_batch_metrics_record.append(train_batch_metrics_avg) train_batch_metrics_record.append(train_batch_metrics_avg)
if trainer_id == 0: if trainer_id == 0:
print_info("batch", train_batch_metrics_avg, print_info(
batch_cost_avg.get_average(), pass_id, "batch",
train_batch_id, args.print_step) train_batch_metrics_avg,
batch_cost_avg.get_average(),
pass_id,
train_batch_id,
args.print_step,
reader_cost=reader_cost_avg.get_average(),
ips=args.batch_size / batch_cost_avg.get_average())
sys.stdout.flush() sys.stdout.flush()
if train_batch_id % args.print_step == 0: if train_batch_id % args.print_step == 0:
reader_cost_avg.reset()
batch_cost_avg.reset() batch_cost_avg.reset()
train_batch_id += 1 train_batch_id += 1
t1 = time.time() t1 = time.time()
......
...@@ -420,7 +420,9 @@ def print_info(info_mode, ...@@ -420,7 +420,9 @@ def print_info(info_mode,
batch_id=0, batch_id=0,
print_step=1, print_step=1,
device_num=1, device_num=1,
class_dim=5): class_dim=5,
reader_cost=0.0,
ips=0.0):
"""print function """print function
Args: Args:
...@@ -439,25 +441,28 @@ def print_info(info_mode, ...@@ -439,25 +441,28 @@ def print_info(info_mode,
if len(metrics) == 2: if len(metrics) == 2:
loss, lr = metrics loss, lr = metrics
logger.info( logger.info(
"[Pass {0}, train batch {1}] \tloss {2}, lr {3}, elapse {4}". "[Pass {0}, train batch {1}] \tloss {2}, lr {3}, reader_cost: {5}, batch_cost: {4}, ips: {6}".
format(pass_id, batch_id, "%.5f" % loss, "%.5f" % lr, format(pass_id, batch_id, "%.5f" % loss, "%.5f" % lr,
"%2.4f sec" % time_info)) "%2.4f sec" % time_info, "%.5f sec" % reader_cost,
"%.5f images/sec" % ips))
# train and no mixup output # train and no mixup output
elif len(metrics) == 4: elif len(metrics) == 4:
loss, acc1, acc5, lr = metrics loss, acc1, acc5, lr = metrics
logger.info( logger.info(
"[Pass {0}, train batch {1}] \tloss {2}, acc1 {3}, acc{7} {4}, lr {5}, elapse {6}". "[Pass {0}, train batch {1}] \tloss {2}, acc1 {3}, acc{7} {4}, lr {5}, reader_cost: {8}, batch_cost: {6}, ips: {9}".
format(pass_id, batch_id, "%.5f" % loss, "%.5f" % acc1, format(pass_id, batch_id, "%.5f" % loss, "%.5f" % acc1,
"%.5f" % acc5, "%.5f" % lr, "%2.4f sec" % time_info, "%.5f" % acc5, "%.5f" % lr, "%2.4f sec" % time_info,
min(class_dim, 5))) min(class_dim, 5), "%.5f sec" % reader_cost,
"%.5f images/sec" % ips))
# test output # test output
elif len(metrics) == 3: elif len(metrics) == 3:
loss, acc1, acc5 = metrics loss, acc1, acc5 = metrics
logger.info( logger.info(
"[Pass {0}, test batch {1}] \tloss {2}, acc1 {3}, acc{6} {4}, elapse {5}". "[Pass {0}, test batch {1}] \tloss {2}, acc1 {3}, acc{6} {4}, reader_cost: {7}, batch_cost: {5}, ips: {8}".
format(pass_id, batch_id, "%.5f" % loss, "%.5f" % acc1, format(pass_id, batch_id, "%.5f" % loss, "%.5f" % acc1,
"%.5f" % acc5, "%2.4f sec" % time_info, "%.5f" % acc5, "%2.4f sec" % time_info,
min(class_dim, 5))) min(class_dim, 5), "%.5f sec" % reader_cost,
"%.5f images/sec" % ips))
else: else:
raise Exception( raise Exception(
"length of metrics {} is not implemented, It maybe caused by wrong format of build_program_output". "length of metrics {} is not implemented, It maybe caused by wrong format of build_program_output".
...@@ -525,7 +530,8 @@ def best_strategy_compiled(args, ...@@ -525,7 +530,8 @@ def best_strategy_compiled(args,
fluid.require_version(min_version='1.7.0') fluid.require_version(min_version='1.7.0')
build_strategy.fuse_bn_act_ops = args.fuse_bn_act_ops build_strategy.fuse_bn_act_ops = args.fuse_bn_act_ops
except Exception as e: except Exception as e:
logger.info("PaddlePaddle version 1.7.0 or higher is " logger.info(
"PaddlePaddle version 1.7.0 or higher is "
"required when you want to fuse batch_norm and activation_op.") "required when you want to fuse batch_norm and activation_op.")
build_strategy.fuse_elewise_add_act_ops = args.fuse_elewise_add_act_ops build_strategy.fuse_elewise_add_act_ops = args.fuse_elewise_add_act_ops
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册