diff --git a/sdk/load-and-run/src/mgblar.cpp b/sdk/load-and-run/src/mgblar.cpp index a30e21b1817ca7ff7afbfe0081b5c56fb478ec41..15ee3456e5acfd9ed82e02db2915cc80804c5db6 100644 --- a/sdk/load-and-run/src/mgblar.cpp +++ b/sdk/load-and-run/src/mgblar.cpp @@ -709,6 +709,41 @@ void run_test_st(Args &env) { } }; + auto run_iters = [&](uint32_t case_idx) -> float { + double time_sqrsum = 0, time_sum = 0, + min_time = std::numeric_limits::max(), max_time = 0; + for (int run = 0; run < env.nr_run; ++run) { + mgb_log_debug("load_and_run: before running iter %d", run); + timer.reset(); + func->execute(); + mgb_log_debug("load_and_run: before waiting iter %d", run); + auto exec_time = timer.get_msecs(); + func->wait(); + output_dumper.write_to_file(); + auto cur = timer.get_msecs(); + printf("iter %d/%d: %.3fms (exec=%.3f,device=%.3f)\n", run, + env.nr_run, cur, exec_time, + func->get_prev_exec_time() * 1e3); + time_sum += cur; + time_sqrsum += cur * cur; + fflush(stdout); + if (cur < min_time) { + min_time = cur; + } + if (cur > max_time) { + max_time = cur; + } + } + printf("=== finished test #%u: time=%.3fms avg_time=%.3fms " + "sd=%.3fms minmax=%.3f,%.3f\n\n", + case_idx, time_sum, time_sum / env.nr_run, + std::sqrt((time_sqrsum * env.nr_run - time_sum * time_sum) / + (env.nr_run * (env.nr_run - 1))), + min_time, max_time); + return time_sum; + + }; + if (nr_test) { // run testcase, generated by dump_with_testcase.py @@ -742,37 +777,7 @@ void run_test_st(Args &env) { if (!env.nr_run) { continue; } - double time_sqrsum = 0, time_sum = 0, - min_time = std::numeric_limits::max(), max_time = 0; - for (int run = 0; run < env.nr_run; ++ run) { - mgb_log_debug("load_and_run: before running iter %d", run); - timer.reset(); - func->execute(); - mgb_log_debug("load_and_run: before waiting iter %d", run); - auto exec_time = timer.get_msecs(); - func->wait(); - output_dumper.write_to_file(); - auto cur = timer.get_msecs(); - printf("iter %d/%d: %.3fms (exec=%.3f,device=%.3f)\n", run, - env.nr_run, cur, exec_time, - func->get_prev_exec_time() * 1e3); - time_sum += cur; - time_sqrsum += cur * cur; - fflush(stdout); - if (cur < min_time) { - min_time = cur; - } - if (cur > max_time) { - max_time = cur; - } - } - tot_time += time_sum; - printf("=== finished test #%u: time=%.3fms avg_time=%.3fms " - "sd=%.3fms minmax=%.3f,%.3f\n\n", - i, time_sum, time_sum / env.nr_run, - std::sqrt((time_sqrsum * env.nr_run - time_sum * time_sum) / - (env.nr_run * (env.nr_run - 1))), - min_time, max_time); + tot_time += run_iters(i); } printf("=== total time: %.3fms\n", tot_time); @@ -793,15 +798,10 @@ void run_test_st(Args &env) { in->copy_from(i.second); } + warmup(); timer.reset(); - func->execute(); - auto exec_time = timer.get_msecs(); - func->wait(); - output_dumper.write_to_file(); - auto cur = timer.get_msecs(); - printf("%.3fms %.3fms (device=%.3f)\n", cur, exec_time, - func->get_prev_exec_time() * 1e3); - + printf("=== going to run input for %d times\n", env.nr_run); + run_iters(0); } else { // run speed test for a raw mgb graph mgb_assert(env.load_ret.tensor_map.empty(),