diff --git a/lite/kernels/mlu/bridges/graph.h b/lite/kernels/mlu/bridges/graph.h index bc4da057bfd39e120a48aa7b0525710c2282358b..a8044ac58243cd94935e4c9d5fd4d330a4a81051 100644 --- a/lite/kernels/mlu/bridges/graph.h +++ b/lite/kernels/mlu/bridges/graph.h @@ -23,6 +23,12 @@ #include "lite/core/tensor.h" #include "lite/kernels/mlu/bridges/tensor.h" +#define PRINT_HW_TIME false + +#if PRINT_HW_TIME +#include +#endif + namespace paddle { namespace lite { namespace subgraph { @@ -32,7 +38,13 @@ namespace mlu { // to the MLU IR graph class Graph { public: - Graph() { CNML_CALL(cnmlCreateFusionOp(&fusion_op_)); } + Graph() { + CNML_CALL(cnmlCreateFusionOp(&fusion_op_)); +#if PRINT_HW_TIME + CNRT_CALL(cnrtCreateNotifier(¬ifier_start_)); + CNRT_CALL(cnrtCreateNotifier(¬ifier_end_)); +#endif + } ~Graph() { FreeConstData(); @@ -40,6 +52,16 @@ class Graph { for (auto op : ops_) { CNML_CALL(cnmlDestroyBaseOp(&op)); } +#if PRINT_HW_TIME + CNRT_CALL(cnrtDestroyNotifier(¬ifier_start_)); + CNRT_CALL(cnrtDestroyNotifier(¬ifier_end_)); + double total_time = 0; + for (auto& f : time_log_) { + total_time += f; + } + std::cout << "cnml hardware time for " << time_log_.size() + << " process:" << total_time / time_log_.size() << std::endl; +#endif } // Data node @@ -90,6 +112,10 @@ class Graph { } void Compute(cnrtInvokeFuncParam_t forward_param, cnrtQueue_t que) { +#if PRINT_HW_TIME + thread_local float hw_time; + CNRT_CALL(cnrtPlaceNotifier(notifier_start_, que)); +#endif CNML_CALL(cnmlComputeFusionOpForward_V3(fusion_op_, input_addrs_.data(), input_addrs_.size(), @@ -97,7 +123,18 @@ class Graph { output_addrs_.size(), &forward_param, que)); +#if PRINT_HW_TIME + CNRT_CALL(cnrtPlaceNotifier(notifier_end_, que)); +#endif + CNRT_CALL(cnrtSyncQueue(que)); +#if PRINT_HW_TIME + CNRT_CALL(cnrtNotifierDuration(notifier_start_, notifier_end_, &hw_time)); + hw_time /= 1000.0f; + DLOG(INFO) << "cnml hardware time " << hw_time << "ms" << std::endl; + std::lock_guard lk(time_mut_); + time_log_.push_back(hw_time); +#endif } template @@ -203,6 +240,11 @@ class Graph { std::vector ops_; cnmlFusionOp_t fusion_op_; std::vector const_data_storage_; +#if PRINT_HW_TIME + cnrtNotifier_t notifier_start_{}, notifier_end_{}; + std::mutex time_mut_; + std::vector time_log_; +#endif }; } // namespace mlu