提交 5b87e8a8 编写于 作者: M Megvii Engine Team 提交者: Xu Xinran

fix(load_and_run): fix load_and_run with --input which ignore iters

GitOrigin-RevId: 870d90900d31c1d795b1adedb63077a4a5c8a974
上级 80af2f93
......@@ -709,6 +709,41 @@ void run_test_st(Args &env) {
}
};
auto run_iters = [&](uint32_t case_idx) -> float {
double time_sqrsum = 0, time_sum = 0,
min_time = std::numeric_limits<double>::max(), max_time = 0;
for (int run = 0; run < env.nr_run; ++run) {
mgb_log_debug("load_and_run: before running iter %d", run);
timer.reset();
func->execute();
mgb_log_debug("load_and_run: before waiting iter %d", run);
auto exec_time = timer.get_msecs();
func->wait();
output_dumper.write_to_file();
auto cur = timer.get_msecs();
printf("iter %d/%d: %.3fms (exec=%.3f,device=%.3f)\n", run,
env.nr_run, cur, exec_time,
func->get_prev_exec_time() * 1e3);
time_sum += cur;
time_sqrsum += cur * cur;
fflush(stdout);
if (cur < min_time) {
min_time = cur;
}
if (cur > max_time) {
max_time = cur;
}
}
printf("=== finished test #%u: time=%.3fms avg_time=%.3fms "
"sd=%.3fms minmax=%.3f,%.3f\n\n",
case_idx, time_sum, time_sum / env.nr_run,
std::sqrt((time_sqrsum * env.nr_run - time_sum * time_sum) /
(env.nr_run * (env.nr_run - 1))),
min_time, max_time);
return time_sum;
};
if (nr_test) {
// run testcase, generated by dump_with_testcase.py
......@@ -742,37 +777,7 @@ void run_test_st(Args &env) {
if (!env.nr_run) {
continue;
}
double time_sqrsum = 0, time_sum = 0,
min_time = std::numeric_limits<double>::max(), max_time = 0;
for (int run = 0; run < env.nr_run; ++ run) {
mgb_log_debug("load_and_run: before running iter %d", run);
timer.reset();
func->execute();
mgb_log_debug("load_and_run: before waiting iter %d", run);
auto exec_time = timer.get_msecs();
func->wait();
output_dumper.write_to_file();
auto cur = timer.get_msecs();
printf("iter %d/%d: %.3fms (exec=%.3f,device=%.3f)\n", run,
env.nr_run, cur, exec_time,
func->get_prev_exec_time() * 1e3);
time_sum += cur;
time_sqrsum += cur * cur;
fflush(stdout);
if (cur < min_time) {
min_time = cur;
}
if (cur > max_time) {
max_time = cur;
}
}
tot_time += time_sum;
printf("=== finished test #%u: time=%.3fms avg_time=%.3fms "
"sd=%.3fms minmax=%.3f,%.3f\n\n",
i, time_sum, time_sum / env.nr_run,
std::sqrt((time_sqrsum * env.nr_run - time_sum * time_sum) /
(env.nr_run * (env.nr_run - 1))),
min_time, max_time);
tot_time += run_iters(i);
}
printf("=== total time: %.3fms\n", tot_time);
......@@ -793,15 +798,10 @@ void run_test_st(Args &env) {
in->copy_from(i.second);
}
warmup();
timer.reset();
func->execute();
auto exec_time = timer.get_msecs();
func->wait();
output_dumper.write_to_file();
auto cur = timer.get_msecs();
printf("%.3fms %.3fms (device=%.3f)\n", cur, exec_time,
func->get_prev_exec_time() * 1e3);
printf("=== going to run input for %d times\n", env.nr_run);
run_iters(0);
} else {
// run speed test for a raw mgb graph
mgb_assert(env.load_ret.tensor_map.empty(),
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册