diff --git a/src/io/io.cpp b/src/io/io.cpp index 9c34378d99e52e8e2919944a9319e8cc97d6b074..b1906fb333985275e85a4f502535f57e77744f07 100644 --- a/src/io/io.cpp +++ b/src/io/io.cpp @@ -14,6 +14,10 @@ limitations under the License. */ #include "io.h" #include +#ifdef PADDLE_MOBILE_PROFILE +#include +#include +#endif #include "common/enforce.h" #include "common/log.h" @@ -336,10 +340,34 @@ std::shared_ptr Executor::Predict( feed_tensor->ShareDataWith(t); std::shared_ptr to_predict_block = to_predict_program_->Block(0); +#ifdef PADDLE_MOBILE_PROFILE + std::map _profile; +#endif for (int j = 0; j < ops_of_block_[*to_predict_block.get()].size(); ++j) { auto op = ops_of_block_[*to_predict_block.get()][j]; +#ifdef PADDLE_MOBILE_PROFILE + _profile[op->Type()] = clock(); +#endif op->Run(); +#ifdef PADDLE_MOBILE_PROFILE + _profile[op->Type()] = clock() - _profile[op->Type()]; +#endif } +#ifdef PADDLE_MOBILE_PROFILE + { + DLOG << "========================[ profile ]=========================="; + clock_t _ptotal = 0; + for (auto const & p : _profile) { + _ptotal += p.second; + } + for (auto const & p : _profile) { + DLOG << p.first << std::string(16-p.first.size(), ' ') + << "\t" << (float)p.second + << "\t\t" << (float)p.second / (float)_ptotal * 100.0; + } + DLOG << "========================[ ]=========================="; + } +#endif auto ops = ops_of_block_[*to_predict_program_->Block(0)]; auto last_op = ops.rbegin(); auto output_map = (*last_op)->Outputs();