未验证 提交 04ecc0e9 编写于 作者: E eclipsycn 提交者: GitHub

Merge branch 'develop' into develop

...@@ -14,9 +14,11 @@ limitations under the License. */ ...@@ -14,9 +14,11 @@ limitations under the License. */
#include "io.h" #include "io.h"
#include <vector> #include <vector>
#define PADDLE_MOBILE_PROFILE
#ifdef PADDLE_MOBILE_PROFILE #ifdef PADDLE_MOBILE_PROFILE
#include <algorithm>
#include <ctime> #include <ctime>
#include <map> #include <unordered_map>
#endif #endif
#include "common/enforce.h" #include "common/enforce.h"
...@@ -344,31 +346,37 @@ std::shared_ptr<framework::Tensor> Executor<Dtype, P>::Predict( ...@@ -344,31 +346,37 @@ std::shared_ptr<framework::Tensor> Executor<Dtype, P>::Predict(
std::shared_ptr<framework::BlockDesc> to_predict_block = std::shared_ptr<framework::BlockDesc> to_predict_block =
to_predict_program_->Block(0); to_predict_program_->Block(0);
#ifdef PADDLE_MOBILE_PROFILE #ifdef PADDLE_MOBILE_PROFILE
std::map<std::string, clock_t> _profile; std::unordered_map<std::string, clock_t> _profile;
#endif #endif
for (int j = 0; j < ops_of_block_[*to_predict_block.get()].size(); ++j) { for (int j = 0; j < ops_of_block_[*to_predict_block.get()].size(); ++j) {
auto op = ops_of_block_[*to_predict_block.get()][j]; auto op = ops_of_block_[*to_predict_block.get()][j];
#ifdef PADDLE_MOBILE_PROFILE #ifdef PADDLE_MOBILE_PROFILE
_profile[op->Type()] = clock(); _profile[op->Type()] -= clock();
#endif #endif
op->Run(); op->Run();
#ifdef PADDLE_MOBILE_PROFILE #ifdef PADDLE_MOBILE_PROFILE
_profile[op->Type()] = clock() - _profile[op->Type()]; _profile[op->Type()] += clock();
#endif #endif
} }
#ifdef PADDLE_MOBILE_PROFILE #ifdef PADDLE_MOBILE_PROFILE
{ {
DLOG << "========================[ profile ]=========================="; std::cout << "====================[ profile ]======================\n";
using prof_t = std::pair<std::string, clock_t>;
std::vector<prof_t> _tprofile(_profile.begin(), _profile.end());
clock_t _ptotal = 0; clock_t _ptotal = 0;
for (auto const &p : _profile) { for (auto const &p : _tprofile) {
_ptotal += p.second; _ptotal += p.second;
} }
for (auto const &p : _profile) { auto compf = [](const prof_t &a, const prof_t &b) {
DLOG << p.first << std::string(16 - p.first.size(), ' ') << "\t" return a.second > b.second;
<< (float)p.second << "\t\t" };
<< (float)p.second / (float)_ptotal * 100.0; std::sort(_tprofile.begin(), _tprofile.end(), compf);
_tprofile.push_back(std::make_pair("total", _ptotal));
for (auto const &p : _tprofile) {
printf("%-16s\t%-10.0f\t%-.4f\n", p.first.c_str(), (float)p.second,
(float)p.second / _ptotal * 100.0);
} }
DLOG << "========================[ ]=========================="; std::cout << "====================[---------]======================\n";
} }
#endif #endif
auto ops = ops_of_block_[*to_predict_program_->Block(0)]; auto ops = ops_of_block_[*to_predict_program_->Block(0)];
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册