未验证 提交 43843192 编写于 作者: Y Yuanle Liu 提交者: GitHub

rename WITH_INFERENCE_NVTX to WITH_NVTX and fix compile bug (#55219)

* fix WITH_SHARED_IR option type

* rename WITH_INFERENCE_NVTX to WITH_NVTX and fix compile bug

* update
上级 2fc429f1
......@@ -270,7 +270,7 @@ option(WITH_PSCORE "Compile with parameter server support" ${WITH_DISTRIBUTE})
option(WITH_HETERPS "Compile with heterps" OFF)
option(WITH_INFERENCE_API_TEST
"Test fluid inference C++ high-level api interface" OFF)
option(WITH_INFERENCE_NVTX "Paddle inference with nvtx for profiler" OFF)
option(WITH_NVTX "Paddle with nvtx for profiler" OFF)
option(PY_VERSION "Compile PaddlePaddle with python3 support" ${PY_VERSION})
option(WITH_DGC "Use DGC(Deep Gradient Compression) or not" ${WITH_DISTRIBUTE})
option(
......@@ -661,6 +661,10 @@ if(WITH_MIPS)
add_definitions(-DPADDLE_WITH_MIPS)
endif()
if(WITH_NVTX AND NOT WIN32)
add_definitions(-DPADDLE_WITH_NVTX)
endif()
if(WITH_LOONGARCH)
set(WITH_XBYAK
OFF
......
......@@ -372,10 +372,6 @@ else()
)
endif()
if(WITH_INFERENCE_NVTX AND NOT WIN32)
add_definitions(-DPADDLE_WITH_INFERENCE_NVTX)
endif()
copy(
inference_lib_dist
SRCS ${src_dir}/inference/capi_exp/pd_*.h ${paddle_inference_c_lib}
......
......@@ -664,37 +664,31 @@ cc_library(
SRCS variable_helper.cc
DEPS lod_tensor)
set(NAIVE_EXECUTOR_DEPS
op_registry
denormal
device_context
scope
framework_proto
glog
lod_rank_table
feed_fetch_method
graph_to_program_pass
variable_helper)
if(TENSORRT_FOUND)
cc_library(
naive_executor
SRCS naive_executor.cc
DEPS op_registry
denormal
device_context
scope
framework_proto
glog
lod_rank_table
feed_fetch_method
graph_to_program_pass
variable_helper
tensorrt_engine_op)
else()
cc_library(
naive_executor
SRCS naive_executor.cc
DEPS op_registry
denormal
device_context
scope
framework_proto
glog
lod_rank_table
feed_fetch_method
graph_to_program_pass
variable_helper)
set(NAIVE_EXECUTOR_DEPS ${NAIVE_EXECUTOR_DEPS} tensorrt_engine_op)
endif()
if(WITH_NVTX AND NOT WIN32)
set(NAIVE_EXECUTOR_DEPS ${NAIVE_EXECUTOR_DEPS} cuda_profiler)
endif()
cc_library(
naive_executor
SRCS naive_executor.cc
DEPS ${NAIVE_EXECUTOR_DEPS})
cc_library(
executor_gc_helper
SRCS executor_gc_helper.cc
......
......@@ -28,7 +28,7 @@
#ifdef PADDLE_WITH_TENSORRT
#include "paddle/fluid/operators/tensorrt/tensorrt_engine_op.h"
#endif
#ifdef PADDLE_WITH_INFERENCE_NVTX
#ifdef PADDLE_WITH_NVTX
#include "paddle/fluid/platform/device/gpu/cuda/cuda_profiler.h"
#endif
#ifdef PADDLE_WITH_LITE
......@@ -57,14 +57,14 @@ void NaiveExecutor::Run() {
platform::RegisterModelLayout(ops_, place_);
#endif
platform::ScopedFlushDenormal flush;
#ifdef PADDLE_WITH_INFERENCE_NVTX
#ifdef PADDLE_WITH_NVTX
platform::CudaNvtxRangePush("model", platform::NvtxRangeColor::Yellow);
#endif
for (auto &op : ops_) {
VLOG(4) << std::this_thread::get_id() << " run "
<< op->DebugStringEx(scope_) << " on scope " << scope_;
op->SetIsCalledByExecutor(false);
#ifdef PADDLE_WITH_INFERENCE_NVTX
#ifdef PADDLE_WITH_NVTX
platform::CudaNvtxRangePush(op->Type() + "|" + op->OutputVars(true).front(),
platform::NvtxRangeColor::Green);
#endif
......@@ -105,14 +105,14 @@ void NaiveExecutor::Run() {
}
}
#ifdef PADDLE_WITH_INFERENCE_NVTX
#ifdef PADDLE_WITH_NVTX
platform::CudaNvtxRangePop();
#endif
for (auto &func : output_hookfuncs_) {
func(op.get(), scope_);
}
}
#ifdef PADDLE_WITH_INFERENCE_NVTX
#ifdef PADDLE_WITH_NVTX
platform::CudaNvtxRangePop();
#endif
}
......
......@@ -118,10 +118,6 @@ if(WITH_PSCORE)
set(SHARED_INFERENCE_DEPS ${SHARED_INFERENCE_DEPS} fleet ps_service)
endif()
if(WITH_INFERENCE_NVTX AND NOT WIN32)
set(SHARED_INFERENCE_DEPS ${SHARED_INFERENCE_DEPS} cuda_profiler)
endif()
if(WITH_ONNXRUNTIME)
set(SHARED_INFERENCE_SRCS
${SHARED_INFERENCE_SRCS}
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册