提交 1afa9492 编写于 作者: P peizhilin

Recover the profiler

上级 445fff24
......@@ -199,10 +199,13 @@ elseif(CMAKE_BUILD_TYPE STREQUAL "MinSizeRel")
list(APPEND CUDA_NVCC_FLAGS ${CMAKE_CXX_FLAGS_RELEASE})
endif()
else(NOT WIN32)
list(APPEND CUDA_NVCC_FLAGS "--compiler-options;/bigobj")
if(CMAKE_BUILD_TYPE STREQUAL "Debug")
list(APPEND CUDA_NVCC_FLAGS "-g -G --compiler-options;/bigobj")
list(APPEND CUDA_NVCC_FLAGS "-g -G")
# match the cl's _ITERATOR_DEBUG_LEVEL
list(APPEND CUDA_NVCC_FLAGS "-D_DEBUG")
elseif(CMAKE_BUILD_TYPE STREQUAL "Release")
list(APPEND CUDA_NVCC_FLAGS "-O3 -DNDEBUG --compiler-options;/bigobj")
list(APPEND CUDA_NVCC_FLAGS "-O3 -DNDEBUG")
else()
message(FATAL "Windows only support Release or Debug build now. Please set visual studio build type to Release/Debug, x64 build.")
endif()
......
......@@ -26,10 +26,8 @@ limitations under the License. */
#include "paddle/fluid/memory/memcpy.h"
#include "paddle/fluid/memory/memory.h"
#if !defined(_WIN32)
#include "paddle/fluid/recordio/scanner.h"
#include "paddle/fluid/recordio/writer.h"
#endif // _WIN32
namespace paddle {
namespace framework {
......@@ -305,7 +303,6 @@ void DeserializeFromStream(std::istream &is, LoDTensor *tensor,
TensorFromStream(is, static_cast<Tensor *>(tensor), dev_ctx);
}
#if !defined(_WIN32)
void WriteToRecordIO(recordio::Writer *writer,
const std::vector<LoDTensor> &tensor,
const platform::DeviceContext &dev_ctx) {
......@@ -335,19 +332,7 @@ bool ReadFromRecordIO(recordio::Scanner *scanner,
return true;
}
#else
class Writer {};
class Scanner {};
void WriteToRecordIO(recordio::Writer *writer,
const std::vector<LoDTensor> &tensor,
const platform::DeviceContext &dev_ctx) {}
bool ReadFromRecordIO(recordio::Scanner *scanner,
const platform::DeviceContext &dev_ctx,
std::vector<LoDTensor> *result_ptr) {
PADDLE_ENFORCE("windows didn't supported recordio!.");
return true;
}
#endif // _WIN32
std::vector<LoDTensor> LoDTensor::SplitLoDTensor(
const std::vector<platform::Place> places) const {
check_memory_size();
......
......@@ -274,7 +274,6 @@ TEST(LoD, ConvertToOffsetBasedLoD) {
EXPECT_EQ(offset_lod, expected);
}
#if !defined(_WIN32)
template <typename T>
static void TestRecordIO() {
LoDTensor tensor;
......@@ -321,7 +320,6 @@ TEST(LoDTensor, RecordIO) {
TestRecordIO<float>();
TestRecordIO<double>();
}
#endif // !defined(_WIN32)
} // namespace framework
} // namespace paddle
......@@ -153,17 +153,14 @@ void OperatorBase::Run(const Scope& scope, const platform::Place& place) {
#endif
}
// The profile has a process-wide mutex, results in serious performance issue
// in concurrency scenerio. Here use an `if` to fix this issue.
// Please not remove the `if`, ask @Superjomn if there are any concern.
#ifndef _WIN32
// The profile has a process-wide mutex, results in serious performance issue
// in concurrency scenerio. Here use an `if` to fix this issue.
// Please not remove the `if`, ask @Superjomn if there are any concern.
if (platform::IsProfileEnabled()) {
platform::DeviceContextPool& pool = platform::DeviceContextPool::Instance();
platform::RecordEvent record_event(Type(), pool.Get(place));
RunImpl(scope, place);
} else
#endif
{
} else {
RunImpl(scope, place);
}
VLOG(30) << place << " " << DebugStringEx(&scope);
......
......@@ -56,7 +56,6 @@ bool AnalysisPredictor::Init(
const std::shared_ptr<framework::Scope> &parent_scope,
const std::shared_ptr<framework::ProgramDesc> &program) {
VLOG(30) << "Predictor::init()";
#if !defined(_WIN32)
if (FLAGS_profile) {
LOG(WARNING) << "Profiler is actived, might affect the performance";
LOG(INFO) << "You can turn off by set gflags '-profile false'";
......@@ -64,7 +63,6 @@ bool AnalysisPredictor::Init(
: platform::ProfilerState::kCPU;
platform::EnableProfiler(tracking_device);
}
#endif
// no matter with or without MKLDNN
paddle::platform::SetNumThreads(FLAGS_paddle_num_threads);
......@@ -501,12 +499,10 @@ bool AnalysisPredictor::LoadParameters() {
}
AnalysisPredictor::~AnalysisPredictor() {
#if !defined(_WIN32)
if (FLAGS_profile) {
platform::DisableProfiler(platform::EventSortingKey::kTotal,
"./profile.log");
}
#endif
if (sub_scope_) {
scope_->DeleteScope(sub_scope_);
}
......
......@@ -64,7 +64,6 @@ void NativePaddlePredictor::PrepareFeedFetch() {
bool NativePaddlePredictor::Init(
std::shared_ptr<framework::Scope> parent_scope) {
VLOG(3) << "Predictor::init()";
#if !defined(_WIN32)
if (FLAGS_profile) {
LOG(WARNING) << "Profiler is actived, might affect the performance";
LOG(INFO) << "You can turn off by set gflags '-profile false'";
......@@ -73,7 +72,6 @@ bool NativePaddlePredictor::Init(
: platform::ProfilerState::kCPU;
platform::EnableProfiler(tracking_device);
}
#endif
// no matter with or without MKLDNN
paddle::platform::SetNumThreads(FLAGS_paddle_num_threads);
......@@ -121,12 +119,10 @@ bool NativePaddlePredictor::Init(
}
NativePaddlePredictor::~NativePaddlePredictor() {
#if !defined(_WIN32)
if (FLAGS_profile) {
platform::DisableProfiler(platform::EventSortingKey::kTotal,
"./profile.log");
}
#endif
if (sub_scope_) {
scope_->DeleteScope(sub_scope_);
}
......
......@@ -177,11 +177,9 @@ void TestOneThreadPrediction(
warmup_timer.tic();
predictor->Run(inputs[0], outputs, batch_size);
PrintTime(batch_size, 1, 1, 0, warmup_timer.toc(), 1);
#if !defined(_WIN32)
if (FLAGS_profile) {
paddle::platform::ResetProfiler();
}
#endif
}
LOG(INFO) << "Run " << num_times << " times...";
......@@ -230,11 +228,9 @@ void TestMultiThreadPrediction(
warmup_timer.tic();
predictor->Run(inputs[0], outputs, batch_size);
PrintTime(batch_size, 1, num_threads, tid, warmup_timer.toc(), 1);
#if !defined(_WIN32)
if (FLAGS_profile) {
paddle::platform::ResetProfiler();
}
#endif
}
LOG(INFO) << "Thread " << tid << " run " << num_times << " times...";
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册