diff --git a/paddle/fluid/framework/CMakeLists.txt b/paddle/fluid/framework/CMakeLists.txt index 7a37fdb7c55aacb5f5ed336149cae335e58154f8..2ef90bf481bf6a9b58a1dd2da8965782d68722df 100644 --- a/paddle/fluid/framework/CMakeLists.txt +++ b/paddle/fluid/framework/CMakeLists.txt @@ -189,9 +189,9 @@ cc_library(parallel_executor SRCS parallel_executor.cc DEPS fast_threaded_ssa_graph_executor variable_helper) if(WITH_PSLIB) - cc_library(async_executor SRCS async_executor.cc data_feed.cc data_feed_factory.cc executor_thread_worker.cc DEPS op_registry device_context scope framework_proto glog lod_rank_table feed_fetch_method graph_to_program_pass async_executor_proto variable_helper pslib_brpc pslib) + cc_library(async_executor SRCS async_executor.cc data_feed.cc data_feed_factory.cc executor_thread_worker.cc DEPS op_registry device_context scope framework_proto glog lod_rank_table feed_fetch_method graph_to_program_pass async_executor_proto variable_helper pslib_brpc pslib timer) else() - cc_library(async_executor SRCS async_executor.cc data_feed.cc data_feed_factory.cc executor_thread_worker.cc DEPS op_registry device_context scope framework_proto glog lod_rank_table feed_fetch_method graph_to_program_pass async_executor_proto variable_helper) + cc_library(async_executor SRCS async_executor.cc data_feed.cc data_feed_factory.cc executor_thread_worker.cc DEPS op_registry device_context scope framework_proto glog lod_rank_table feed_fetch_method graph_to_program_pass async_executor_proto variable_helper timer) endif(WITH_PSLIB) diff --git a/paddle/fluid/framework/async_executor.cc b/paddle/fluid/framework/async_executor.cc index ee3c5e01f87eeb123f43f867296e35cc8adb7e8e..1d9678a1ba1409e5c18d3e25b3aa13dfbbf76908 100644 --- a/paddle/fluid/framework/async_executor.cc +++ b/paddle/fluid/framework/async_executor.cc @@ -304,8 +304,13 @@ void AsyncExecutor::RunFromFile(const ProgramDesc& main_program, // start executing ops in multiple threads for (int thidx = 0; thidx < actual_thread_num; ++thidx) { - threads.push_back( - std::thread(&ExecutorThreadWorker::TrainFiles, workers[thidx].get())); + if (debug) { + threads.push_back(std::thread(&ExecutorThreadWorker::TrainFilesWithTimer, + workers[thidx].get())); + } else { + threads.push_back( + std::thread(&ExecutorThreadWorker::TrainFiles, workers[thidx].get())); + } } for (auto& th : threads) { diff --git a/paddle/fluid/framework/executor_thread_worker.cc b/paddle/fluid/framework/executor_thread_worker.cc index 2eb9e564f87807e88def536ee875ebe0d1e83cd6..4972bc7ec3a90f8cebea19bcaf320813f7e50e39 100644 --- a/paddle/fluid/framework/executor_thread_worker.cc +++ b/paddle/fluid/framework/executor_thread_worker.cc @@ -29,6 +29,7 @@ limitations under the License. */ #include "paddle/fluid/inference/io.h" #include "paddle/fluid/platform/cpu_helper.h" #include "paddle/fluid/platform/place.h" +#include "paddle/fluid/platform/timer.h" #include "paddle/fluid/pybind/pybind.h" namespace paddle { namespace framework { @@ -180,6 +181,7 @@ void ExecutorThreadWorker::SetDevice() { return; #else static unsigned concurrency_cap = std::thread::hardware_concurrency(); + LOG(WARNING) << "concurrency capacity " << concurrency_cap; int thread_id = this->thread_id_; if (static_cast(thread_id) < concurrency_cap) { @@ -238,6 +240,55 @@ static void print_fetch_var(Scope* scope, const std::string& var_name) { VLOG(1) << "print_fetch_var: unrecognized data type:" << tensor.type(); } +void ExecutorThreadWorker::TrainFilesWithTimer() { + platform::SetNumThreads(1); + SetDevice(); + thread_reader_->Start(); + std::vector op_total_time; + std::vector op_name; + for (auto& op : ops_) { + op_name.push_back(op->Type()); + } + op_total_time.resize(ops_.size()); + for (size_t i = 0; i < op_total_time.size(); ++i) { + op_total_time[i] = 0.0; + } + platform::Timer timeline; + double total_time = 0.0; + double read_time = 0.0; + int cur_batch; + int batch_cnt = 0; + timeline.Start(); + while ((cur_batch = thread_reader_->Next()) > 0) { + timeline.Pause(); + read_time += timeline.ElapsedSec(); + total_time += timeline.ElapsedSec(); + for (size_t i = 0; i < ops_.size(); ++i) { + timeline.Start(); + ops_[i]->Run(*thread_scope_, place_); + timeline.Pause(); + op_total_time[i] += timeline.ElapsedSec(); + total_time += timeline.ElapsedSec(); + } + ++batch_cnt; + thread_scope_->DropKids(); + if (thread_id_ == 0) { + if (batch_cnt > 0 && batch_cnt % 1000 == 0) { + for (size_t i = 0; i < ops_.size(); ++i) { + fprintf(stderr, "op_name:[%zu][%s], op_mean_time:[%fs]\n", i, + op_name[i].c_str(), op_total_time[i] / batch_cnt); + } + fprintf(stderr, "mean read time: %fs\n", read_time / batch_cnt); + int fetch_var_num = fetch_var_names_.size(); + for (int i = 0; i < fetch_var_num; ++i) { + print_fetch_var(thread_scope_, fetch_var_names_[i]); + } + } + } + timeline.Start(); + } +} + void ExecutorThreadWorker::TrainFiles() { platform::SetNumThreads(1); @@ -320,10 +371,12 @@ void AsyncExecutorThreadWorker::SetPSlibPtr( std::shared_ptr pslib_ptr) { _pslib_ptr = pslib_ptr; } + void AsyncExecutorThreadWorker::SetPullDenseThread( std::shared_ptr dpt) { _pull_dense_thread = dpt; } + void AsyncExecutorThreadWorker::TrainOneNetwork() { PrepareParams(); diff --git a/paddle/fluid/framework/executor_thread_worker.h b/paddle/fluid/framework/executor_thread_worker.h index 30b81ad88035eacc7a8efbe6d20f03d362122003..524922b0322e538d46f93011fbca3223b02d8849 100644 --- a/paddle/fluid/framework/executor_thread_worker.h +++ b/paddle/fluid/framework/executor_thread_worker.h @@ -155,6 +155,8 @@ class ExecutorThreadWorker { void SetDataFeed(const std::shared_ptr& datafeed); // A multi-thread training function virtual void TrainFiles(); + // with timer log + virtual void TrainFilesWithTimer(); // set fetch variable names from python interface assigned by users void SetFetchVarNames(const std::vector& fetch_var_names); #ifdef PADDLE_WITH_PSLIB diff --git a/paddle/fluid/platform/CMakeLists.txt b/paddle/fluid/platform/CMakeLists.txt index d1dff16ddd859e6bf19ec22420c28819a9f14d50..05a0f14440732e5aef2ff665fbd3a5c1c7094581 100644 --- a/paddle/fluid/platform/CMakeLists.txt +++ b/paddle/fluid/platform/CMakeLists.txt @@ -84,6 +84,9 @@ cc_test(init_test SRCS init_test.cc DEPS device_context) nv_test(cudnn_helper_test SRCS cudnn_helper_test.cc DEPS dynload_cuda) nv_test(transform_test SRCS transform_test.cu DEPS memory place device_context) +cc_library(timer SRCS timer.cc) +cc_test(timer_test SRCS timer_test.cc DEPS timer) + cc_library(device_tracer SRCS device_tracer.cc DEPS boost profiler_proto framework_proto ${GPU_CTX_DEPS}) cc_library(profiler SRCS profiler.cc DEPS device_context device_tracer) cc_test(profiler_test SRCS profiler_test.cc DEPS profiler) diff --git a/paddle/fluid/platform/timer.cc b/paddle/fluid/platform/timer.cc new file mode 100644 index 0000000000000000000000000000000000000000..75d4e5cbf90bd81c73756605eacc6b0c15a63e9d --- /dev/null +++ b/paddle/fluid/platform/timer.cc @@ -0,0 +1,63 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/fluid/platform/timer.h" + +namespace paddle { +namespace platform { + +void Timer::Reset() { + _start.tv_sec = 0; + _start.tv_usec = 0; + + _count = 0; + _elapsed = 0; + _paused = true; +} + +void Timer::Start() { + Reset(); + Resume(); +} + +void Timer::Pause() { + if (_paused) { + return; + } + _elapsed += Tickus(); + ++_count; + _paused = true; +} + +void Timer::Resume() { + gettimeofday(&_start, NULL); + _paused = false; +} + +int Timer::Count() { return _count; } + +double Timer::ElapsedUS() { return static_cast(_elapsed); } + +double Timer::ElapsedMS() { return _elapsed / 1000.0; } + +double Timer::ElapsedSec() { return _elapsed / 1000000.0; } + +int64_t Timer::Tickus() { + gettimeofday(&_now, NULL); + return (_now.tv_sec - _start.tv_sec) * 1000 * 1000L + + (_now.tv_usec - _start.tv_usec); +} + +} // namespace platform +} // namespace paddle diff --git a/paddle/fluid/platform/timer.h b/paddle/fluid/platform/timer.h new file mode 100644 index 0000000000000000000000000000000000000000..9bb66eb97ad92e526147a5a4c009880783ac3c94 --- /dev/null +++ b/paddle/fluid/platform/timer.h @@ -0,0 +1,54 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once +#include +#include "paddle/fluid/platform/port.h" + +namespace paddle { +namespace platform { + +// A Standard Timer implementation for debugging +class Timer { + public: + // a timer class for profiling + // Reset() will be called during initialization + // all timing variables will be set 0 in Reset() + Timer() { Reset(); } + void Reset(); + void Start(); + void Pause(); + // Resume will get current system time + void Resume(); + int Count(); + // return elapsed time in us + double ElapsedUS(); + // return elapsed time in ms + double ElapsedMS(); + // return elapsed time in sec + double ElapsedSec(); + + private: + struct timeval _start; + struct timeval _now; + int _count; + int _elapsed; + bool _paused; + + // get us difference between start and now + int64_t Tickus(); +}; + +} // namespace platform +} // namespace paddle diff --git a/paddle/fluid/platform/timer_test.cc b/paddle/fluid/platform/timer_test.cc new file mode 100644 index 0000000000000000000000000000000000000000..09edf8131ffa5c1dfe607b7d72627b225c4452fa --- /dev/null +++ b/paddle/fluid/platform/timer_test.cc @@ -0,0 +1,45 @@ +// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#include "paddle/fluid/platform/timer.h" +#include "gtest/gtest.h" + +TEST(Timer, Reset) { + paddle::platform::Timer timeline; + timeline.Start(); + sleep(3); + timeline.Pause(); + timeline.Reset(); +} + +TEST(Timer, Start) { + paddle::platform::Timer timeline; + timeline.Start(); + sleep(3); + timeline.Pause(); +} + +TEST(Timer, Pause) { + paddle::platform::Timer timeline; + timeline.Start(); + sleep(3); + timeline.Pause(); +} + +TEST(Timer, Resume) { + paddle::platform::Timer timeline; + timeline.Start(); + sleep(3); + timeline.Pause(); + timeline.Resume(); +}