From 1afa9492af7b76d15d38bc2f49d28069832c2cc9 Mon Sep 17 00:00:00 2001 From: peizhilin Date: Mon, 26 Nov 2018 11:33:12 +0800 Subject: [PATCH] Recover the profiler --- cmake/cuda.cmake | 7 +++++-- paddle/fluid/framework/lod_tensor.cc | 17 +---------------- paddle/fluid/framework/lod_tensor_test.cc | 2 -- paddle/fluid/framework/operator.cc | 11 ++++------- .../fluid/inference/api/analysis_predictor.cc | 4 ---- paddle/fluid/inference/api/api_impl.cc | 4 ---- .../fluid/inference/tests/api/tester_helper.h | 4 ---- 7 files changed, 10 insertions(+), 39 deletions(-) diff --git a/cmake/cuda.cmake b/cmake/cuda.cmake index 4c7e0fd3f..414e92eb2 100644 --- a/cmake/cuda.cmake +++ b/cmake/cuda.cmake @@ -199,10 +199,13 @@ elseif(CMAKE_BUILD_TYPE STREQUAL "MinSizeRel") list(APPEND CUDA_NVCC_FLAGS ${CMAKE_CXX_FLAGS_RELEASE}) endif() else(NOT WIN32) +list(APPEND CUDA_NVCC_FLAGS "--compiler-options;/bigobj") if(CMAKE_BUILD_TYPE STREQUAL "Debug") - list(APPEND CUDA_NVCC_FLAGS "-g -G --compiler-options;/bigobj") + list(APPEND CUDA_NVCC_FLAGS "-g -G") + # match the cl's _ITERATOR_DEBUG_LEVEL + list(APPEND CUDA_NVCC_FLAGS "-D_DEBUG") elseif(CMAKE_BUILD_TYPE STREQUAL "Release") - list(APPEND CUDA_NVCC_FLAGS "-O3 -DNDEBUG --compiler-options;/bigobj") + list(APPEND CUDA_NVCC_FLAGS "-O3 -DNDEBUG") else() message(FATAL "Windows only support Release or Debug build now. Please set visual studio build type to Release/Debug, x64 build.") endif() diff --git a/paddle/fluid/framework/lod_tensor.cc b/paddle/fluid/framework/lod_tensor.cc index 669d08c70..9b2eeaf59 100644 --- a/paddle/fluid/framework/lod_tensor.cc +++ b/paddle/fluid/framework/lod_tensor.cc @@ -26,10 +26,8 @@ limitations under the License. */ #include "paddle/fluid/memory/memcpy.h" #include "paddle/fluid/memory/memory.h" -#if !defined(_WIN32) #include "paddle/fluid/recordio/scanner.h" #include "paddle/fluid/recordio/writer.h" -#endif // _WIN32 namespace paddle { namespace framework { @@ -305,7 +303,6 @@ void DeserializeFromStream(std::istream &is, LoDTensor *tensor, TensorFromStream(is, static_cast(tensor), dev_ctx); } -#if !defined(_WIN32) void WriteToRecordIO(recordio::Writer *writer, const std::vector &tensor, const platform::DeviceContext &dev_ctx) { @@ -335,19 +332,7 @@ bool ReadFromRecordIO(recordio::Scanner *scanner, return true; } -#else -class Writer {}; -class Scanner {}; -void WriteToRecordIO(recordio::Writer *writer, - const std::vector &tensor, - const platform::DeviceContext &dev_ctx) {} -bool ReadFromRecordIO(recordio::Scanner *scanner, - const platform::DeviceContext &dev_ctx, - std::vector *result_ptr) { - PADDLE_ENFORCE("windows didn't supported recordio!."); - return true; -} -#endif // _WIN32 + std::vector LoDTensor::SplitLoDTensor( const std::vector places) const { check_memory_size(); diff --git a/paddle/fluid/framework/lod_tensor_test.cc b/paddle/fluid/framework/lod_tensor_test.cc index cbf5fd04d..cd50aaa26 100644 --- a/paddle/fluid/framework/lod_tensor_test.cc +++ b/paddle/fluid/framework/lod_tensor_test.cc @@ -274,7 +274,6 @@ TEST(LoD, ConvertToOffsetBasedLoD) { EXPECT_EQ(offset_lod, expected); } -#if !defined(_WIN32) template static void TestRecordIO() { LoDTensor tensor; @@ -321,7 +320,6 @@ TEST(LoDTensor, RecordIO) { TestRecordIO(); TestRecordIO(); } -#endif // !defined(_WIN32) } // namespace framework } // namespace paddle diff --git a/paddle/fluid/framework/operator.cc b/paddle/fluid/framework/operator.cc index 1ec170b6f..60fe83860 100644 --- a/paddle/fluid/framework/operator.cc +++ b/paddle/fluid/framework/operator.cc @@ -153,17 +153,14 @@ void OperatorBase::Run(const Scope& scope, const platform::Place& place) { #endif } -// The profile has a process-wide mutex, results in serious performance issue -// in concurrency scenerio. Here use an `if` to fix this issue. -// Please not remove the `if`, ask @Superjomn if there are any concern. -#ifndef _WIN32 + // The profile has a process-wide mutex, results in serious performance issue + // in concurrency scenerio. Here use an `if` to fix this issue. + // Please not remove the `if`, ask @Superjomn if there are any concern. if (platform::IsProfileEnabled()) { platform::DeviceContextPool& pool = platform::DeviceContextPool::Instance(); platform::RecordEvent record_event(Type(), pool.Get(place)); RunImpl(scope, place); - } else -#endif - { + } else { RunImpl(scope, place); } VLOG(30) << place << " " << DebugStringEx(&scope); diff --git a/paddle/fluid/inference/api/analysis_predictor.cc b/paddle/fluid/inference/api/analysis_predictor.cc index cb14d2a26..8b9f87253 100644 --- a/paddle/fluid/inference/api/analysis_predictor.cc +++ b/paddle/fluid/inference/api/analysis_predictor.cc @@ -56,7 +56,6 @@ bool AnalysisPredictor::Init( const std::shared_ptr &parent_scope, const std::shared_ptr &program) { VLOG(30) << "Predictor::init()"; -#if !defined(_WIN32) if (FLAGS_profile) { LOG(WARNING) << "Profiler is actived, might affect the performance"; LOG(INFO) << "You can turn off by set gflags '-profile false'"; @@ -64,7 +63,6 @@ bool AnalysisPredictor::Init( : platform::ProfilerState::kCPU; platform::EnableProfiler(tracking_device); } -#endif // no matter with or without MKLDNN paddle::platform::SetNumThreads(FLAGS_paddle_num_threads); @@ -501,12 +499,10 @@ bool AnalysisPredictor::LoadParameters() { } AnalysisPredictor::~AnalysisPredictor() { -#if !defined(_WIN32) if (FLAGS_profile) { platform::DisableProfiler(platform::EventSortingKey::kTotal, "./profile.log"); } -#endif if (sub_scope_) { scope_->DeleteScope(sub_scope_); } diff --git a/paddle/fluid/inference/api/api_impl.cc b/paddle/fluid/inference/api/api_impl.cc index fcbc3803d..d80d8097f 100644 --- a/paddle/fluid/inference/api/api_impl.cc +++ b/paddle/fluid/inference/api/api_impl.cc @@ -64,7 +64,6 @@ void NativePaddlePredictor::PrepareFeedFetch() { bool NativePaddlePredictor::Init( std::shared_ptr parent_scope) { VLOG(3) << "Predictor::init()"; -#if !defined(_WIN32) if (FLAGS_profile) { LOG(WARNING) << "Profiler is actived, might affect the performance"; LOG(INFO) << "You can turn off by set gflags '-profile false'"; @@ -73,7 +72,6 @@ bool NativePaddlePredictor::Init( : platform::ProfilerState::kCPU; platform::EnableProfiler(tracking_device); } -#endif // no matter with or without MKLDNN paddle::platform::SetNumThreads(FLAGS_paddle_num_threads); @@ -121,12 +119,10 @@ bool NativePaddlePredictor::Init( } NativePaddlePredictor::~NativePaddlePredictor() { -#if !defined(_WIN32) if (FLAGS_profile) { platform::DisableProfiler(platform::EventSortingKey::kTotal, "./profile.log"); } -#endif if (sub_scope_) { scope_->DeleteScope(sub_scope_); } diff --git a/paddle/fluid/inference/tests/api/tester_helper.h b/paddle/fluid/inference/tests/api/tester_helper.h index 7b686045a..23507cf9a 100644 --- a/paddle/fluid/inference/tests/api/tester_helper.h +++ b/paddle/fluid/inference/tests/api/tester_helper.h @@ -177,11 +177,9 @@ void TestOneThreadPrediction( warmup_timer.tic(); predictor->Run(inputs[0], outputs, batch_size); PrintTime(batch_size, 1, 1, 0, warmup_timer.toc(), 1); -#if !defined(_WIN32) if (FLAGS_profile) { paddle::platform::ResetProfiler(); } -#endif } LOG(INFO) << "Run " << num_times << " times..."; @@ -230,11 +228,9 @@ void TestMultiThreadPrediction( warmup_timer.tic(); predictor->Run(inputs[0], outputs, batch_size); PrintTime(batch_size, 1, num_threads, tid, warmup_timer.toc(), 1); -#if !defined(_WIN32) if (FLAGS_profile) { paddle::platform::ResetProfiler(); } -#endif } LOG(INFO) << "Thread " << tid << " run " << num_times << " times..."; -- GitLab