未验证 提交 afaf06e7 编写于 作者: W wanghuancoder 提交者: GitHub

refine resnet benchmard print (#4893)

* fix ptb_dy time print for benchmark, test=develop

* refine resnet benchmard print, test=develop
上级 4000dfb1
......@@ -120,6 +120,7 @@ def validate(args,
test_batch_time_record = []
test_batch_metrics_record = []
test_batch_id = 0
if int(os.environ.get('PADDLE_TRAINERS_NUM', 1)) > 1:
compiled_program = test_prog
else:
......@@ -245,6 +246,7 @@ def train(args):
train_fetch_vars[0], exe)
batch_cost_avg = TimeCostAverage()
reader_cost_avg = TimeCostAverage()
#NOTE: this for benchmark
......@@ -267,11 +269,14 @@ def train(args):
#NOTE: this is for benchmark
if args.max_iter and total_batch_num == args.max_iter:
return
t2 = time.time()
reader_cost = t2 - t1
reader_cost_avg.record(reader_cost)
train_batch_metrics = exe.run(compiled_train_prog,
feed=batch,
fetch_list=train_fetch_list)
t2 = time.time()
train_batch_elapse = t2 - t1
t3 = time.time()
train_batch_elapse = t3 - t1
train_batch_time_record.append(train_batch_elapse)
batch_cost_avg.record(train_batch_elapse)
......@@ -279,11 +284,18 @@ def train(args):
np.array(train_batch_metrics), axis=1)
train_batch_metrics_record.append(train_batch_metrics_avg)
if trainer_id == 0:
print_info("batch", train_batch_metrics_avg,
batch_cost_avg.get_average(), pass_id,
train_batch_id, args.print_step)
print_info(
"batch",
train_batch_metrics_avg,
batch_cost_avg.get_average(),
pass_id,
train_batch_id,
args.print_step,
reader_cost=reader_cost_avg.get_average(),
ips=args.batch_size / batch_cost_avg.get_average())
sys.stdout.flush()
if train_batch_id % args.print_step == 0:
reader_cost_avg.reset()
batch_cost_avg.reset()
train_batch_id += 1
t1 = time.time()
......
......@@ -420,7 +420,9 @@ def print_info(info_mode,
batch_id=0,
print_step=1,
device_num=1,
class_dim=5):
class_dim=5,
reader_cost=0.0,
ips=0.0):
"""print function
Args:
......@@ -439,25 +441,28 @@ def print_info(info_mode,
if len(metrics) == 2:
loss, lr = metrics
logger.info(
"[Pass {0}, train batch {1}] \tloss {2}, lr {3}, elapse {4}".
"[Pass {0}, train batch {1}] \tloss {2}, lr {3}, reader_cost: {5}, batch_cost: {4}, ips: {6}".
format(pass_id, batch_id, "%.5f" % loss, "%.5f" % lr,
"%2.4f sec" % time_info))
"%2.4f sec" % time_info, "%.5f sec" % reader_cost,
"%.5f images/sec" % ips))
# train and no mixup output
elif len(metrics) == 4:
loss, acc1, acc5, lr = metrics
logger.info(
"[Pass {0}, train batch {1}] \tloss {2}, acc1 {3}, acc{7} {4}, lr {5}, elapse {6}".
"[Pass {0}, train batch {1}] \tloss {2}, acc1 {3}, acc{7} {4}, lr {5}, reader_cost: {8}, batch_cost: {6}, ips: {9}".
format(pass_id, batch_id, "%.5f" % loss, "%.5f" % acc1,
"%.5f" % acc5, "%.5f" % lr, "%2.4f sec" % time_info,
min(class_dim, 5)))
min(class_dim, 5), "%.5f sec" % reader_cost,
"%.5f images/sec" % ips))
# test output
elif len(metrics) == 3:
loss, acc1, acc5 = metrics
logger.info(
"[Pass {0}, test batch {1}] \tloss {2}, acc1 {3}, acc{6} {4}, elapse {5}".
"[Pass {0}, test batch {1}] \tloss {2}, acc1 {3}, acc{6} {4}, reader_cost: {7}, batch_cost: {5}, ips: {8}".
format(pass_id, batch_id, "%.5f" % loss, "%.5f" % acc1,
"%.5f" % acc5, "%2.4f sec" % time_info,
min(class_dim, 5)))
min(class_dim, 5), "%.5f sec" % reader_cost,
"%.5f images/sec" % ips))
else:
raise Exception(
"length of metrics {} is not implemented, It maybe caused by wrong format of build_program_output".
......@@ -525,8 +530,9 @@ def best_strategy_compiled(args,
fluid.require_version(min_version='1.7.0')
build_strategy.fuse_bn_act_ops = args.fuse_bn_act_ops
except Exception as e:
logger.info("PaddlePaddle version 1.7.0 or higher is "
"required when you want to fuse batch_norm and activation_op.")
logger.info(
"PaddlePaddle version 1.7.0 or higher is "
"required when you want to fuse batch_norm and activation_op.")
build_strategy.fuse_elewise_add_act_ops = args.fuse_elewise_add_act_ops
exec_strategy = fluid.ExecutionStrategy()
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册