rename WITH_INFERENCE_NVTX to WITH_NVTX and fix compile bug (#55219)

* fix WITH_SHARED_IR option type * rename WITH_INFERENCE_NVTX to WITH_NVTX and fix compile bug * update

rename WITH_INFERENCE_NVTX to WITH_NVTX and fix compile bug (#55219)
* fix WITH_SHARED_IR option type * rename WITH_INFERENCE_NVTX to WITH_NVTX and fix compile bug * update
43843192 · Yuanle Liu · GitHub · 2fc429f1 · 43843192 · 43843192
5 changed file
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -270,7 +270,7 @@ option(WITH_PSCORE "Compile with parameter server support" ${WITH_DISTRIBUTE})
 option(WITH_HETERPS "Compile with heterps" OFF)
 option(WITH_INFERENCE_API_TEST
       "Test fluid inference C++ high-level api interface" OFF)
-option(WITH_INFERENCE_NVTX "Paddle inference with nvtx for profiler" OFF)
+option(WITH_NVTX "Paddle with nvtx for profiler" OFF)
 option(PY_VERSION "Compile PaddlePaddle with python3 support" ${PY_VERSION})
 option(WITH_DGC "Use DGC(Deep Gradient Compression) or not" ${WITH_DISTRIBUTE})
 option(
@@ -661,6 +661,10 @@ if(WITH_MIPS)
  add_definitions(-DPADDLE_WITH_MIPS)
 endif()
+if(WITH_NVTX AND NOT WIN32)
+  add_definitions(-DPADDLE_WITH_NVTX)
+endif()
 if(WITH_LOONGARCH)
  set(WITH_XBYAK
      OFF

--- a/cmake/inference_lib.cmake
+++ b/cmake/inference_lib.cmake
@@ -372,10 +372,6 @@ else()
  )
 endif()
-if(WITH_INFERENCE_NVTX AND NOT WIN32)
-  add_definitions(-DPADDLE_WITH_INFERENCE_NVTX)
-endif()
 copy(
  inference_lib_dist
  SRCS ${src_dir}/inference/capi_exp/pd_*.h ${paddle_inference_c_lib}

--- a/paddle/fluid/framework/CMakeLists.txt
+++ b/paddle/fluid/framework/CMakeLists.txt
@@ -664,37 +664,31 @@ cc_library(
  SRCS variable_helper.cc
  DEPS lod_tensor)
+set(NAIVE_EXECUTOR_DEPS
+    op_registry
+    denormal
+    device_context
+    scope
+    framework_proto
+    glog
+    lod_rank_table
+    feed_fetch_method
+    graph_to_program_pass
+    variable_helper)
 if(TENSORRT_FOUND)
-  cc_library(
+  set(NAIVE_EXECUTOR_DEPS ${NAIVE_EXECUTOR_DEPS} tensorrt_engine_op)
-    naive_executor
+endif()
-    SRCS naive_executor.cc
-    DEPS op_registry
+if(WITH_NVTX AND NOT WIN32)
-         denormal
+  set(NAIVE_EXECUTOR_DEPS ${NAIVE_EXECUTOR_DEPS} cuda_profiler)
-         device_context
-         scope
-         framework_proto
-         glog
-         lod_rank_table
-         feed_fetch_method
-         graph_to_program_pass
-         variable_helper
-         tensorrt_engine_op)
-else()
-  cc_library(
-    naive_executor
-    SRCS naive_executor.cc
-    DEPS op_registry
-         denormal
-         device_context
-         scope
-         framework_proto
-         glog
-         lod_rank_table
-         feed_fetch_method
-         graph_to_program_pass
-         variable_helper)
 endif()
+cc_library(
+  naive_executor
+  SRCS naive_executor.cc
+  DEPS ${NAIVE_EXECUTOR_DEPS})
 cc_library(
  executor_gc_helper
  SRCS executor_gc_helper.cc

--- a/paddle/fluid/framework/naive_executor.cc
+++ b/paddle/fluid/framework/naive_executor.cc
@@ -28,7 +28,7 @@
 #ifdef PADDLE_WITH_TENSORRT
 #include "paddle/fluid/operators/tensorrt/tensorrt_engine_op.h"
 #endif
-#ifdef PADDLE_WITH_INFERENCE_NVTX
+#ifdef PADDLE_WITH_NVTX
 #include "paddle/fluid/platform/device/gpu/cuda/cuda_profiler.h"
 #endif
 #ifdef PADDLE_WITH_LITE
@@ -57,14 +57,14 @@ void NaiveExecutor::Run() {
  platform::RegisterModelLayout(ops_, place_);
 #endif
  platform::ScopedFlushDenormal flush;
-#ifdef PADDLE_WITH_INFERENCE_NVTX
+#ifdef PADDLE_WITH_NVTX
  platform::CudaNvtxRangePush("model", platform::NvtxRangeColor::Yellow);
 #endif
  for (auto &op : ops_) {
    VLOG(4) << std::this_thread::get_id() << " run "
            << op->DebugStringEx(scope_) << " on scope " << scope_;
    op->SetIsCalledByExecutor(false);
-#ifdef PADDLE_WITH_INFERENCE_NVTX
+#ifdef PADDLE_WITH_NVTX
    platform::CudaNvtxRangePush(op->Type() + "|" + op->OutputVars(true).front(),
                                platform::NvtxRangeColor::Green);
 #endif
@@ -105,14 +105,14 @@ void NaiveExecutor::Run() {
      }
    }
-#ifdef PADDLE_WITH_INFERENCE_NVTX
+#ifdef PADDLE_WITH_NVTX
    platform::CudaNvtxRangePop();
 #endif
    for (auto &func : output_hookfuncs_) {
      func(op.get(), scope_);
    }
  }
-#ifdef PADDLE_WITH_INFERENCE_NVTX
+#ifdef PADDLE_WITH_NVTX
  platform::CudaNvtxRangePop();
 #endif
 }

--- a/paddle/fluid/inference/CMakeLists.txt
+++ b/paddle/fluid/inference/CMakeLists.txt
@@ -118,10 +118,6 @@ if(WITH_PSCORE)
  set(SHARED_INFERENCE_DEPS ${SHARED_INFERENCE_DEPS} fleet ps_service)
 endif()
-if(WITH_INFERENCE_NVTX AND NOT WIN32)
-  set(SHARED_INFERENCE_DEPS ${SHARED_INFERENCE_DEPS} cuda_profiler)
-endif()
 if(WITH_ONNXRUNTIME)
  set(SHARED_INFERENCE_SRCS
      ${SHARED_INFERENCE_SRCS}