diff --git a/paddle/fluid/framework/async_executor.cc b/paddle/fluid/framework/async_executor.cc index ee3c5e01f87eeb123f43f867296e35cc8adb7e8e..1d9678a1ba1409e5c18d3e25b3aa13dfbbf76908 100644 --- a/paddle/fluid/framework/async_executor.cc +++ b/paddle/fluid/framework/async_executor.cc @@ -304,8 +304,13 @@ void AsyncExecutor::RunFromFile(const ProgramDesc& main_program, // start executing ops in multiple threads for (int thidx = 0; thidx < actual_thread_num; ++thidx) { - threads.push_back( - std::thread(&ExecutorThreadWorker::TrainFiles, workers[thidx].get())); + if (debug) { + threads.push_back(std::thread(&ExecutorThreadWorker::TrainFilesWithTimer, + workers[thidx].get())); + } else { + threads.push_back( + std::thread(&ExecutorThreadWorker::TrainFiles, workers[thidx].get())); + } } for (auto& th : threads) { diff --git a/paddle/fluid/framework/executor_thread_worker.cc b/paddle/fluid/framework/executor_thread_worker.cc index 2eb9e564f87807e88def536ee875ebe0d1e83cd6..1e8f6c6182aab4b981a50a0f42ef2d02bcb9d642 100644 --- a/paddle/fluid/framework/executor_thread_worker.cc +++ b/paddle/fluid/framework/executor_thread_worker.cc @@ -29,6 +29,7 @@ limitations under the License. */ #include "paddle/fluid/inference/io.h" #include "paddle/fluid/platform/cpu_helper.h" #include "paddle/fluid/platform/place.h" +#include "paddle/fluid/platform/timer.h" #include "paddle/fluid/pybind/pybind.h" namespace paddle { namespace framework { @@ -180,6 +181,7 @@ void ExecutorThreadWorker::SetDevice() { return; #else static unsigned concurrency_cap = std::thread::hardware_concurrency(); + LOG(WARNING) << "concurrency capacity " << concurrency_cap; int thread_id = this->thread_id_; if (static_cast(thread_id) < concurrency_cap) { @@ -238,6 +240,49 @@ static void print_fetch_var(Scope* scope, const std::string& var_name) { VLOG(1) << "print_fetch_var: unrecognized data type:" << tensor.type(); } +void ExecutorThreadWorker::TrainFilesWithTimer() { + platform::SetNumThreads(1); + SetDevice(); + thread_reader_->Start(); + std::vector op_total_time; + std::vector op_name; + for (auto& op : ops_) { + op_name.push_back(op->Type()); + } + op_total_time.resize(ops_.size()); + for (size_t i = 0; i < op_total_time.size(); ++i) { + op_total_time[i] = 0.0; + } + platform::Timer timeline; + double total_time = 0.0; + double read_time = 0.0; + int cur_batch; + int batch_cnt = 0; + timeline.Start(); + while ((cur_batch = thread_reader_->Next()) > 0) { + timeline.Pause(); + read_time += timeline.ElapsedSec(); + total_time += timeline.ElapsedSec(); + for (size_t i = 0; i < ops_.size(); ++i) { + timeline.Start(); + ops_[i]->Run(*thread_scope_, place_); + timeline.Pause(); + op_total_time[i] += timeline.ElapsedSec(); + total_time += timeline.ElapsedSec(); + } + ++batch_cnt; + thread_scope_->DropKids(); + if (batch_cnt > 0 && batch_cnt % 1000 == 0) { + for (size_t i = 0; i < ops_.size(); ++i) { + fprintf(stderr, "op_name:[%zu][%s], op_mean_time:[%fs]\n", i, + op_name[i].c_str(), op_total_time[i] / batch_cnt); + } + fprintf(stderr, "mean read time: %fs\n", read_time / batch_cnt); + } + timeline.Start(); + } +} + void ExecutorThreadWorker::TrainFiles() { platform::SetNumThreads(1); diff --git a/paddle/fluid/framework/executor_thread_worker.h b/paddle/fluid/framework/executor_thread_worker.h index 30b81ad88035eacc7a8efbe6d20f03d362122003..524922b0322e538d46f93011fbca3223b02d8849 100644 --- a/paddle/fluid/framework/executor_thread_worker.h +++ b/paddle/fluid/framework/executor_thread_worker.h @@ -155,6 +155,8 @@ class ExecutorThreadWorker { void SetDataFeed(const std::shared_ptr& datafeed); // A multi-thread training function virtual void TrainFiles(); + // with timer log + virtual void TrainFilesWithTimer(); // set fetch variable names from python interface assigned by users void SetFetchVarNames(const std::vector& fetch_var_names); #ifdef PADDLE_WITH_PSLIB diff --git a/paddle/fluid/platform/CMakeLists.txt b/paddle/fluid/platform/CMakeLists.txt index d1dff16ddd859e6bf19ec22420c28819a9f14d50..5197d5d01d8b9800391cfe375c5bff37990f7ae2 100644 --- a/paddle/fluid/platform/CMakeLists.txt +++ b/paddle/fluid/platform/CMakeLists.txt @@ -84,6 +84,8 @@ cc_test(init_test SRCS init_test.cc DEPS device_context) nv_test(cudnn_helper_test SRCS cudnn_helper_test.cc DEPS dynload_cuda) nv_test(transform_test SRCS transform_test.cu DEPS memory place device_context) +cc_library(timer SRCS timer.cc) + cc_library(device_tracer SRCS device_tracer.cc DEPS boost profiler_proto framework_proto ${GPU_CTX_DEPS}) cc_library(profiler SRCS profiler.cc DEPS device_context device_tracer) cc_test(profiler_test SRCS profiler_test.cc DEPS profiler) diff --git a/paddle/fluid/platform/timer.cc b/paddle/fluid/platform/timer.cc new file mode 100644 index 0000000000000000000000000000000000000000..75d4e5cbf90bd81c73756605eacc6b0c15a63e9d --- /dev/null +++ b/paddle/fluid/platform/timer.cc @@ -0,0 +1,63 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/fluid/platform/timer.h" + +namespace paddle { +namespace platform { + +void Timer::Reset() { + _start.tv_sec = 0; + _start.tv_usec = 0; + + _count = 0; + _elapsed = 0; + _paused = true; +} + +void Timer::Start() { + Reset(); + Resume(); +} + +void Timer::Pause() { + if (_paused) { + return; + } + _elapsed += Tickus(); + ++_count; + _paused = true; +} + +void Timer::Resume() { + gettimeofday(&_start, NULL); + _paused = false; +} + +int Timer::Count() { return _count; } + +double Timer::ElapsedUS() { return static_cast(_elapsed); } + +double Timer::ElapsedMS() { return _elapsed / 1000.0; } + +double Timer::ElapsedSec() { return _elapsed / 1000000.0; } + +int64_t Timer::Tickus() { + gettimeofday(&_now, NULL); + return (_now.tv_sec - _start.tv_sec) * 1000 * 1000L + + (_now.tv_usec - _start.tv_usec); +} + +} // namespace platform +} // namespace paddle diff --git a/paddle/fluid/platform/timer.h b/paddle/fluid/platform/timer.h index 592d8c8e9da9b1d2972d8c8d196e13a0bd6838b6..35bd83a33dea5125c260345927cb4f721614e74a 100644 --- a/paddle/fluid/platform/timer.h +++ b/paddle/fluid/platform/timer.h @@ -14,74 +14,33 @@ limitations under the License. */ #pragma once #include -#include -#include -#include +#include "paddle/fluid/framework/scope.h" namespace paddle { namespace platform { // A Standard Timer implementation for debugging - class Timer { public: - Timer() { - reset(); - } - - inline void reset() { - _start.tv_sec = 0; - _start.tv_usec = 0; - - _count = 0; - _elapsed = 0; - _paused = true; - } - - inline void start() { - reset(); - resume(); - } - - inline void pause() { - if (_paused) { - return; - } - _elapsed += tickus(); - ++_count; - _paused = true; - } - - inline void resume() { - gettimeofday(&_start, NULL); - _paused = false; - } - - inline int count() const { - return _count; - } - - inline double elapsed_us() const { - return static_cast(_elapsed); - } - inline double elapsed_ms() const { - return _elapsed / 1000.0; - } - inline double elapsed_sec() const { - return _elapsed / 1000000.0; - } + Timer() { Reset(); } + void Reset(); + void Start(); + void Pause(); + void Resume(); + int Count(); + double ElapsedUS(); + double ElapsedMS(); + double ElapsedSec(); private: - struct timeval _start; - struct timeval _now; + struct timeval _start; + struct timeval _now; + int _count; + int _elapsed; + bool _paused; - int32_t _count; - int64_t _elapsed; - bool _paused; - - inline int64_t tickus() { - gettimeofday(&_now, NULL); - return (_now.tv_sec - _start.tv_sec) * 1000 * 1000L + - (_now.tv_usec - _start.tv_usec); - } + int64_t Tickus(); }; + +} // namespace platform +} // namespace paddle