提交 2dee8f6c 编写于 作者: D dongdaxiang

add TrainFilesWithTimer in async_executor

上级 cf6188a8
......@@ -304,9 +304,14 @@ void AsyncExecutor::RunFromFile(const ProgramDesc& main_program,
// start executing ops in multiple threads
for (int thidx = 0; thidx < actual_thread_num; ++thidx) {
if (debug) {
threads.push_back(std::thread(&ExecutorThreadWorker::TrainFilesWithTimer,
workers[thidx].get()));
} else {
threads.push_back(
std::thread(&ExecutorThreadWorker::TrainFiles, workers[thidx].get()));
}
}
for (auto& th : threads) {
th.join();
......
......@@ -29,6 +29,7 @@ limitations under the License. */
#include "paddle/fluid/inference/io.h"
#include "paddle/fluid/platform/cpu_helper.h"
#include "paddle/fluid/platform/place.h"
#include "paddle/fluid/platform/timer.h"
#include "paddle/fluid/pybind/pybind.h"
namespace paddle {
namespace framework {
......@@ -180,6 +181,7 @@ void ExecutorThreadWorker::SetDevice() {
return;
#else
static unsigned concurrency_cap = std::thread::hardware_concurrency();
LOG(WARNING) << "concurrency capacity " << concurrency_cap;
int thread_id = this->thread_id_;
if (static_cast<unsigned>(thread_id) < concurrency_cap) {
......@@ -238,6 +240,49 @@ static void print_fetch_var(Scope* scope, const std::string& var_name) {
VLOG(1) << "print_fetch_var: unrecognized data type:" << tensor.type();
}
void ExecutorThreadWorker::TrainFilesWithTimer() {
platform::SetNumThreads(1);
SetDevice();
thread_reader_->Start();
std::vector<double> op_total_time;
std::vector<std::string> op_name;
for (auto& op : ops_) {
op_name.push_back(op->Type());
}
op_total_time.resize(ops_.size());
for (size_t i = 0; i < op_total_time.size(); ++i) {
op_total_time[i] = 0.0;
}
platform::Timer timeline;
double total_time = 0.0;
double read_time = 0.0;
int cur_batch;
int batch_cnt = 0;
timeline.Start();
while ((cur_batch = thread_reader_->Next()) > 0) {
timeline.Pause();
read_time += timeline.ElapsedSec();
total_time += timeline.ElapsedSec();
for (size_t i = 0; i < ops_.size(); ++i) {
timeline.Start();
ops_[i]->Run(*thread_scope_, place_);
timeline.Pause();
op_total_time[i] += timeline.ElapsedSec();
total_time += timeline.ElapsedSec();
}
++batch_cnt;
thread_scope_->DropKids();
if (batch_cnt > 0 && batch_cnt % 1000 == 0) {
for (size_t i = 0; i < ops_.size(); ++i) {
fprintf(stderr, "op_name:[%zu][%s], op_mean_time:[%fs]\n", i,
op_name[i].c_str(), op_total_time[i] / batch_cnt);
}
fprintf(stderr, "mean read time: %fs\n", read_time / batch_cnt);
}
timeline.Start();
}
}
void ExecutorThreadWorker::TrainFiles() {
platform::SetNumThreads(1);
......
......@@ -155,6 +155,8 @@ class ExecutorThreadWorker {
void SetDataFeed(const std::shared_ptr<DataFeed>& datafeed);
// A multi-thread training function
virtual void TrainFiles();
// with timer log
virtual void TrainFilesWithTimer();
// set fetch variable names from python interface assigned by users
void SetFetchVarNames(const std::vector<std::string>& fetch_var_names);
#ifdef PADDLE_WITH_PSLIB
......
......@@ -84,6 +84,8 @@ cc_test(init_test SRCS init_test.cc DEPS device_context)
nv_test(cudnn_helper_test SRCS cudnn_helper_test.cc DEPS dynload_cuda)
nv_test(transform_test SRCS transform_test.cu DEPS memory place device_context)
cc_library(timer SRCS timer.cc)
cc_library(device_tracer SRCS device_tracer.cc DEPS boost profiler_proto framework_proto ${GPU_CTX_DEPS})
cc_library(profiler SRCS profiler.cc DEPS device_context device_tracer)
cc_test(profiler_test SRCS profiler_test.cc DEPS profiler)
......
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/platform/timer.h"
namespace paddle {
namespace platform {
void Timer::Reset() {
_start.tv_sec = 0;
_start.tv_usec = 0;
_count = 0;
_elapsed = 0;
_paused = true;
}
void Timer::Start() {
Reset();
Resume();
}
void Timer::Pause() {
if (_paused) {
return;
}
_elapsed += Tickus();
++_count;
_paused = true;
}
void Timer::Resume() {
gettimeofday(&_start, NULL);
_paused = false;
}
int Timer::Count() { return _count; }
double Timer::ElapsedUS() { return static_cast<double>(_elapsed); }
double Timer::ElapsedMS() { return _elapsed / 1000.0; }
double Timer::ElapsedSec() { return _elapsed / 1000000.0; }
int64_t Timer::Tickus() {
gettimeofday(&_now, NULL);
return (_now.tv_sec - _start.tv_sec) * 1000 * 1000L +
(_now.tv_usec - _start.tv_usec);
}
} // namespace platform
} // namespace paddle
......@@ -14,74 +14,33 @@ limitations under the License. */
#pragma once
#include <sys/time.h>
#include <pmmintrin.h>
#include <string>
#include <vector>
#include "paddle/fluid/framework/scope.h"
namespace paddle {
namespace platform {
// A Standard Timer implementation for debugging
class Timer {
public:
Timer() {
reset();
}
inline void reset() {
_start.tv_sec = 0;
_start.tv_usec = 0;
_count = 0;
_elapsed = 0;
_paused = true;
}
inline void start() {
reset();
resume();
}
inline void pause() {
if (_paused) {
return;
}
_elapsed += tickus();
++_count;
_paused = true;
}
inline void resume() {
gettimeofday(&_start, NULL);
_paused = false;
}
inline int count() const {
return _count;
}
inline double elapsed_us() const {
return static_cast<double>(_elapsed);
}
inline double elapsed_ms() const {
return _elapsed / 1000.0;
}
inline double elapsed_sec() const {
return _elapsed / 1000000.0;
}
Timer() { Reset(); }
void Reset();
void Start();
void Pause();
void Resume();
int Count();
double ElapsedUS();
double ElapsedMS();
double ElapsedSec();
private:
struct timeval _start;
struct timeval _now;
int32_t _count;
int64_t _elapsed;
int _count;
int _elapsed;
bool _paused;
inline int64_t tickus() {
gettimeofday(&_now, NULL);
return (_now.tv_sec - _start.tv_sec) * 1000 * 1000L +
(_now.tv_usec - _start.tv_usec);
}
int64_t Tickus();
};
} // namespace platform
} // namespace paddle
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册