diff --git a/paddle/fluid/framework/async_executor.cc b/paddle/fluid/framework/async_executor.cc index ee3c5e01f87eeb123f43f867296e35cc8adb7e8e..1d9678a1ba1409e5c18d3e25b3aa13dfbbf76908 100644 --- a/paddle/fluid/framework/async_executor.cc +++ b/paddle/fluid/framework/async_executor.cc @@ -304,8 +304,13 @@ void AsyncExecutor::RunFromFile(const ProgramDesc& main_program, // start executing ops in multiple threads for (int thidx = 0; thidx < actual_thread_num; ++thidx) { - threads.push_back( - std::thread(&ExecutorThreadWorker::TrainFiles, workers[thidx].get())); + if (debug) { + threads.push_back(std::thread(&ExecutorThreadWorker::TrainFilesWithTimer, + workers[thidx].get())); + } else { + threads.push_back( + std::thread(&ExecutorThreadWorker::TrainFiles, workers[thidx].get())); + } } for (auto& th : threads) { diff --git a/paddle/fluid/framework/executor_thread_worker.cc b/paddle/fluid/framework/executor_thread_worker.cc index 2eb9e564f87807e88def536ee875ebe0d1e83cd6..c26e6bf4790b8dd3b4ba5c8470ad1cdfae2acfa9 100644 --- a/paddle/fluid/framework/executor_thread_worker.cc +++ b/paddle/fluid/framework/executor_thread_worker.cc @@ -180,6 +180,7 @@ void ExecutorThreadWorker::SetDevice() { return; #else static unsigned concurrency_cap = std::thread::hardware_concurrency(); + LOG(WARNING) << "concurrency capacity " << concurrency_cap; int thread_id = this->thread_id_; if (static_cast(thread_id) < concurrency_cap) { @@ -238,6 +239,49 @@ static void print_fetch_var(Scope* scope, const std::string& var_name) { VLOG(1) << "print_fetch_var: unrecognized data type:" << tensor.type(); } +void ExecutorThreadWorker::TrainFilesWithTimer() { + platform::SetNumThreads(1); + SetDevice(); + thread_reader_->Start(); + std::vector op_total_time; + std::vector op_name; + for (auto& op : ops_) { + op_name.push_back(op->Type()); + } + op_total_time.resize(ops_.size()); + for (int i = 0; i < op_total_time.size(); ++i) { + op_total_time[i] = 0.0; + } + platform::Timer timeline; + double total_time = 0.0; + double read_time = 0.0; + int cur_batch; + int batch_cnt = 0; + timeline.Start(); + while ((cur_batch = thread_reader_->Next()) > 0) { + timeline.Pause(); + read_time += timeline.ElapsedSec(); + total_time += timeline.ElapsedSec(); + for (int i = 0; i < ops_.size(); ++i) { + timeline.Start(); + ops_[i]->Run(*thread_scope_, place_); + timeline.Pause(); + op_total_time[i] += timeline.ElapsedSec(); + total_time += timeline.ElapsedSec(); + } + ++batch_cnt; + thread_scope_->DropKids(); + if (batch_cnt > 0 && batch_cnt % 1000 == 0) { + for (int i = 0; i < ops_.size(); ++i) { + fprintf(stderr, "op_name:[%d][%s], op_mean_time:[%fs]\n", i, + op_name[i].c_str(), op_total_time[i] / batch_cnt); + } + fprintf(stderr, "mean read time: %fs\n", read_time / batch_cnt); + } + timeline.Start(); + } +} + void ExecutorThreadWorker::TrainFiles() { platform::SetNumThreads(1); diff --git a/paddle/fluid/framework/executor_thread_worker.h b/paddle/fluid/framework/executor_thread_worker.h index 30b81ad88035eacc7a8efbe6d20f03d362122003..524922b0322e538d46f93011fbca3223b02d8849 100644 --- a/paddle/fluid/framework/executor_thread_worker.h +++ b/paddle/fluid/framework/executor_thread_worker.h @@ -155,6 +155,8 @@ class ExecutorThreadWorker { void SetDataFeed(const std::shared_ptr& datafeed); // A multi-thread training function virtual void TrainFiles(); + // with timer log + virtual void TrainFilesWithTimer(); // set fetch variable names from python interface assigned by users void SetFetchVarNames(const std::vector& fetch_var_names); #ifdef PADDLE_WITH_PSLIB