提交 1afa9492 编写于 作者: P peizhilin

Recover the profiler

上级 445fff24
...@@ -199,10 +199,13 @@ elseif(CMAKE_BUILD_TYPE STREQUAL "MinSizeRel") ...@@ -199,10 +199,13 @@ elseif(CMAKE_BUILD_TYPE STREQUAL "MinSizeRel")
list(APPEND CUDA_NVCC_FLAGS ${CMAKE_CXX_FLAGS_RELEASE}) list(APPEND CUDA_NVCC_FLAGS ${CMAKE_CXX_FLAGS_RELEASE})
endif() endif()
else(NOT WIN32) else(NOT WIN32)
list(APPEND CUDA_NVCC_FLAGS "--compiler-options;/bigobj")
if(CMAKE_BUILD_TYPE STREQUAL "Debug") if(CMAKE_BUILD_TYPE STREQUAL "Debug")
list(APPEND CUDA_NVCC_FLAGS "-g -G --compiler-options;/bigobj") list(APPEND CUDA_NVCC_FLAGS "-g -G")
# match the cl's _ITERATOR_DEBUG_LEVEL
list(APPEND CUDA_NVCC_FLAGS "-D_DEBUG")
elseif(CMAKE_BUILD_TYPE STREQUAL "Release") elseif(CMAKE_BUILD_TYPE STREQUAL "Release")
list(APPEND CUDA_NVCC_FLAGS "-O3 -DNDEBUG --compiler-options;/bigobj") list(APPEND CUDA_NVCC_FLAGS "-O3 -DNDEBUG")
else() else()
message(FATAL "Windows only support Release or Debug build now. Please set visual studio build type to Release/Debug, x64 build.") message(FATAL "Windows only support Release or Debug build now. Please set visual studio build type to Release/Debug, x64 build.")
endif() endif()
......
...@@ -26,10 +26,8 @@ limitations under the License. */ ...@@ -26,10 +26,8 @@ limitations under the License. */
#include "paddle/fluid/memory/memcpy.h" #include "paddle/fluid/memory/memcpy.h"
#include "paddle/fluid/memory/memory.h" #include "paddle/fluid/memory/memory.h"
#if !defined(_WIN32)
#include "paddle/fluid/recordio/scanner.h" #include "paddle/fluid/recordio/scanner.h"
#include "paddle/fluid/recordio/writer.h" #include "paddle/fluid/recordio/writer.h"
#endif // _WIN32
namespace paddle { namespace paddle {
namespace framework { namespace framework {
...@@ -305,7 +303,6 @@ void DeserializeFromStream(std::istream &is, LoDTensor *tensor, ...@@ -305,7 +303,6 @@ void DeserializeFromStream(std::istream &is, LoDTensor *tensor,
TensorFromStream(is, static_cast<Tensor *>(tensor), dev_ctx); TensorFromStream(is, static_cast<Tensor *>(tensor), dev_ctx);
} }
#if !defined(_WIN32)
void WriteToRecordIO(recordio::Writer *writer, void WriteToRecordIO(recordio::Writer *writer,
const std::vector<LoDTensor> &tensor, const std::vector<LoDTensor> &tensor,
const platform::DeviceContext &dev_ctx) { const platform::DeviceContext &dev_ctx) {
...@@ -335,19 +332,7 @@ bool ReadFromRecordIO(recordio::Scanner *scanner, ...@@ -335,19 +332,7 @@ bool ReadFromRecordIO(recordio::Scanner *scanner,
return true; return true;
} }
#else
class Writer {};
class Scanner {};
void WriteToRecordIO(recordio::Writer *writer,
const std::vector<LoDTensor> &tensor,
const platform::DeviceContext &dev_ctx) {}
bool ReadFromRecordIO(recordio::Scanner *scanner,
const platform::DeviceContext &dev_ctx,
std::vector<LoDTensor> *result_ptr) {
PADDLE_ENFORCE("windows didn't supported recordio!.");
return true;
}
#endif // _WIN32
std::vector<LoDTensor> LoDTensor::SplitLoDTensor( std::vector<LoDTensor> LoDTensor::SplitLoDTensor(
const std::vector<platform::Place> places) const { const std::vector<platform::Place> places) const {
check_memory_size(); check_memory_size();
......
...@@ -274,7 +274,6 @@ TEST(LoD, ConvertToOffsetBasedLoD) { ...@@ -274,7 +274,6 @@ TEST(LoD, ConvertToOffsetBasedLoD) {
EXPECT_EQ(offset_lod, expected); EXPECT_EQ(offset_lod, expected);
} }
#if !defined(_WIN32)
template <typename T> template <typename T>
static void TestRecordIO() { static void TestRecordIO() {
LoDTensor tensor; LoDTensor tensor;
...@@ -321,7 +320,6 @@ TEST(LoDTensor, RecordIO) { ...@@ -321,7 +320,6 @@ TEST(LoDTensor, RecordIO) {
TestRecordIO<float>(); TestRecordIO<float>();
TestRecordIO<double>(); TestRecordIO<double>();
} }
#endif // !defined(_WIN32)
} // namespace framework } // namespace framework
} // namespace paddle } // namespace paddle
...@@ -153,17 +153,14 @@ void OperatorBase::Run(const Scope& scope, const platform::Place& place) { ...@@ -153,17 +153,14 @@ void OperatorBase::Run(const Scope& scope, const platform::Place& place) {
#endif #endif
} }
// The profile has a process-wide mutex, results in serious performance issue // The profile has a process-wide mutex, results in serious performance issue
// in concurrency scenerio. Here use an `if` to fix this issue. // in concurrency scenerio. Here use an `if` to fix this issue.
// Please not remove the `if`, ask @Superjomn if there are any concern. // Please not remove the `if`, ask @Superjomn if there are any concern.
#ifndef _WIN32
if (platform::IsProfileEnabled()) { if (platform::IsProfileEnabled()) {
platform::DeviceContextPool& pool = platform::DeviceContextPool::Instance(); platform::DeviceContextPool& pool = platform::DeviceContextPool::Instance();
platform::RecordEvent record_event(Type(), pool.Get(place)); platform::RecordEvent record_event(Type(), pool.Get(place));
RunImpl(scope, place); RunImpl(scope, place);
} else } else {
#endif
{
RunImpl(scope, place); RunImpl(scope, place);
} }
VLOG(30) << place << " " << DebugStringEx(&scope); VLOG(30) << place << " " << DebugStringEx(&scope);
......
...@@ -56,7 +56,6 @@ bool AnalysisPredictor::Init( ...@@ -56,7 +56,6 @@ bool AnalysisPredictor::Init(
const std::shared_ptr<framework::Scope> &parent_scope, const std::shared_ptr<framework::Scope> &parent_scope,
const std::shared_ptr<framework::ProgramDesc> &program) { const std::shared_ptr<framework::ProgramDesc> &program) {
VLOG(30) << "Predictor::init()"; VLOG(30) << "Predictor::init()";
#if !defined(_WIN32)
if (FLAGS_profile) { if (FLAGS_profile) {
LOG(WARNING) << "Profiler is actived, might affect the performance"; LOG(WARNING) << "Profiler is actived, might affect the performance";
LOG(INFO) << "You can turn off by set gflags '-profile false'"; LOG(INFO) << "You can turn off by set gflags '-profile false'";
...@@ -64,7 +63,6 @@ bool AnalysisPredictor::Init( ...@@ -64,7 +63,6 @@ bool AnalysisPredictor::Init(
: platform::ProfilerState::kCPU; : platform::ProfilerState::kCPU;
platform::EnableProfiler(tracking_device); platform::EnableProfiler(tracking_device);
} }
#endif
// no matter with or without MKLDNN // no matter with or without MKLDNN
paddle::platform::SetNumThreads(FLAGS_paddle_num_threads); paddle::platform::SetNumThreads(FLAGS_paddle_num_threads);
...@@ -501,12 +499,10 @@ bool AnalysisPredictor::LoadParameters() { ...@@ -501,12 +499,10 @@ bool AnalysisPredictor::LoadParameters() {
} }
AnalysisPredictor::~AnalysisPredictor() { AnalysisPredictor::~AnalysisPredictor() {
#if !defined(_WIN32)
if (FLAGS_profile) { if (FLAGS_profile) {
platform::DisableProfiler(platform::EventSortingKey::kTotal, platform::DisableProfiler(platform::EventSortingKey::kTotal,
"./profile.log"); "./profile.log");
} }
#endif
if (sub_scope_) { if (sub_scope_) {
scope_->DeleteScope(sub_scope_); scope_->DeleteScope(sub_scope_);
} }
......
...@@ -64,7 +64,6 @@ void NativePaddlePredictor::PrepareFeedFetch() { ...@@ -64,7 +64,6 @@ void NativePaddlePredictor::PrepareFeedFetch() {
bool NativePaddlePredictor::Init( bool NativePaddlePredictor::Init(
std::shared_ptr<framework::Scope> parent_scope) { std::shared_ptr<framework::Scope> parent_scope) {
VLOG(3) << "Predictor::init()"; VLOG(3) << "Predictor::init()";
#if !defined(_WIN32)
if (FLAGS_profile) { if (FLAGS_profile) {
LOG(WARNING) << "Profiler is actived, might affect the performance"; LOG(WARNING) << "Profiler is actived, might affect the performance";
LOG(INFO) << "You can turn off by set gflags '-profile false'"; LOG(INFO) << "You can turn off by set gflags '-profile false'";
...@@ -73,7 +72,6 @@ bool NativePaddlePredictor::Init( ...@@ -73,7 +72,6 @@ bool NativePaddlePredictor::Init(
: platform::ProfilerState::kCPU; : platform::ProfilerState::kCPU;
platform::EnableProfiler(tracking_device); platform::EnableProfiler(tracking_device);
} }
#endif
// no matter with or without MKLDNN // no matter with or without MKLDNN
paddle::platform::SetNumThreads(FLAGS_paddle_num_threads); paddle::platform::SetNumThreads(FLAGS_paddle_num_threads);
...@@ -121,12 +119,10 @@ bool NativePaddlePredictor::Init( ...@@ -121,12 +119,10 @@ bool NativePaddlePredictor::Init(
} }
NativePaddlePredictor::~NativePaddlePredictor() { NativePaddlePredictor::~NativePaddlePredictor() {
#if !defined(_WIN32)
if (FLAGS_profile) { if (FLAGS_profile) {
platform::DisableProfiler(platform::EventSortingKey::kTotal, platform::DisableProfiler(platform::EventSortingKey::kTotal,
"./profile.log"); "./profile.log");
} }
#endif
if (sub_scope_) { if (sub_scope_) {
scope_->DeleteScope(sub_scope_); scope_->DeleteScope(sub_scope_);
} }
......
...@@ -177,11 +177,9 @@ void TestOneThreadPrediction( ...@@ -177,11 +177,9 @@ void TestOneThreadPrediction(
warmup_timer.tic(); warmup_timer.tic();
predictor->Run(inputs[0], outputs, batch_size); predictor->Run(inputs[0], outputs, batch_size);
PrintTime(batch_size, 1, 1, 0, warmup_timer.toc(), 1); PrintTime(batch_size, 1, 1, 0, warmup_timer.toc(), 1);
#if !defined(_WIN32)
if (FLAGS_profile) { if (FLAGS_profile) {
paddle::platform::ResetProfiler(); paddle::platform::ResetProfiler();
} }
#endif
} }
LOG(INFO) << "Run " << num_times << " times..."; LOG(INFO) << "Run " << num_times << " times...";
...@@ -230,11 +228,9 @@ void TestMultiThreadPrediction( ...@@ -230,11 +228,9 @@ void TestMultiThreadPrediction(
warmup_timer.tic(); warmup_timer.tic();
predictor->Run(inputs[0], outputs, batch_size); predictor->Run(inputs[0], outputs, batch_size);
PrintTime(batch_size, 1, num_threads, tid, warmup_timer.toc(), 1); PrintTime(batch_size, 1, num_threads, tid, warmup_timer.toc(), 1);
#if !defined(_WIN32)
if (FLAGS_profile) { if (FLAGS_profile) {
paddle::platform::ResetProfiler(); paddle::platform::ResetProfiler();
} }
#endif
} }
LOG(INFO) << "Thread " << tid << " run " << num_times << " times..."; LOG(INFO) << "Thread " << tid << " run " << num_times << " times...";
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册