diff --git a/paddle/fluid/inference/CMakeLists.txt b/paddle/fluid/inference/CMakeLists.txt
index b8f60d252137fbe611fbaeb9bd9e746f04244344..de23d3e477ecf1810df19d6c1c54767003ce6775 100644
--- a/paddle/fluid/inference/CMakeLists.txt
+++ b/paddle/fluid/inference/CMakeLists.txt
@@ -154,14 +154,8 @@ set_target_properties(paddle_inference_shared PROPERTIES OUTPUT_NAME
                                                          paddle_inference)
 if(NOT APPLE AND NOT WIN32)
   # TODO(liuyiqun): Temporarily disable the link flag because it is not support on Mac.
-  if(WITH_CUSTOM_DEVICE)
-    set(LINK_FLAGS
-        "-Wl,--version-script ${CMAKE_CURRENT_SOURCE_DIR}/paddle_inference_custom_device.map"
-    )
-  else()
-    set(LINK_FLAGS
-        "-Wl,--version-script ${CMAKE_CURRENT_SOURCE_DIR}/paddle_inference.map")
-  endif()
+  set(LINK_FLAGS
+      "-Wl,--version-script ${CMAKE_CURRENT_SOURCE_DIR}/paddle_inference.map")
   set_target_properties(paddle_inference_shared PROPERTIES LINK_FLAGS
                                                            "${LINK_FLAGS}")
   # check symbol hidden
diff --git a/paddle/fluid/inference/api/details/zero_copy_tensor.cc b/paddle/fluid/inference/api/details/zero_copy_tensor.cc
index 59b44769ddd38b073947a4096b498a20f17e25d1..80bde735363d1e0bb16bb458b43f6ce3b5d623fc 100644
--- a/paddle/fluid/inference/api/details/zero_copy_tensor.cc
+++ b/paddle/fluid/inference/api/details/zero_copy_tensor.cc
@@ -378,17 +378,16 @@ void Tensor::CopyToCpuImpl(T *data,
   auto *t_data = tensor->data<T>();
   auto t_place = tensor->place();
 
-  phi::DenseTensor out;
-  auto mem_allocation =
-      std::make_shared<paddle::memory::allocation::Allocation>(
-          static_cast<void *>(data),
-          ele_num * sizeof(T),
-          paddle::platform::CPUPlace());
-  out.ResetHolder(mem_allocation);
-
   if (paddle::platform::is_cpu_place(t_place)) {
 #ifdef PADDLE_WITH_MKLDNN
-    if (tensor->layout() == phi::DataLayout::ONEDNN)
+    if (tensor->layout() == phi::DataLayout::ONEDNN) {
+      phi::DenseTensor out;
+      auto mem_allocation =
+          std::make_shared<paddle::memory::allocation::Allocation>(
+              static_cast<void *>(data),
+              ele_num * sizeof(T),
+              paddle::platform::CPUPlace());
+      out.ResetHolder(mem_allocation);
       phi::funcs::TransDataLayoutFromOneDNN(
           tensor->layout(),
           phi::OneDNNContext::tls().get_cur_paddle_data_layout(),
@@ -396,8 +395,9 @@ void Tensor::CopyToCpuImpl(T *data,
           &out,
           paddle::platform::CPUPlace(),
           true);
-    else
+    } else {
       std::memcpy(static_cast<void *>(data), t_data, ele_num * sizeof(T));
+    }
 #else
     std::memcpy(static_cast<void *>(data), t_data, ele_num * sizeof(T));
 #endif
@@ -871,17 +871,16 @@ void InternalUtils::CopyToCpuWithIoStream(paddle_infer::Tensor *t,
   auto *t_data = tensor->data<T>();
   auto t_place = tensor->place();
 
-  phi::DenseTensor out;
-  auto mem_allocation =
-      std::make_shared<paddle::memory::allocation::Allocation>(
-          static_cast<void *>(data),
-          ele_num * sizeof(T),
-          paddle::platform::CPUPlace());
-  out.ResetHolder(mem_allocation);
-
   if (paddle::platform::is_cpu_place(t_place)) {
 #ifdef PADDLE_WITH_MKLDNN
-    if (tensor->layout() == phi::DataLayout::ONEDNN)
+    if (tensor->layout() == phi::DataLayout::ONEDNN) {
+      phi::DenseTensor out;
+      auto mem_allocation =
+          std::make_shared<paddle::memory::allocation::Allocation>(
+              static_cast<void *>(data),
+              ele_num * sizeof(T),
+              paddle::platform::CPUPlace());
+      out.ResetHolder(mem_allocation);
       phi::funcs::TransDataLayoutFromOneDNN(
           tensor->layout(),
           phi::OneDNNContext::tls().get_cur_paddle_data_layout(),
@@ -889,8 +888,9 @@ void InternalUtils::CopyToCpuWithIoStream(paddle_infer::Tensor *t,
           &out,
           paddle::platform::CPUPlace(),
           true);
-    else
+    } else {
       std::memcpy(static_cast<void *>(data), t_data, ele_num * sizeof(T));
+    }
 #else
     std::memcpy(static_cast<void *>(data), t_data, ele_num * sizeof(T));
 #endif
diff --git a/paddle/fluid/inference/paddle_inference.map b/paddle/fluid/inference/paddle_inference.map
index b29235b5d3e1c68b4ce90613784ef57b166d7247..a928401490b8621a4bbed9b9ee3227d9083307c2 100644
--- a/paddle/fluid/inference/paddle_inference.map
+++ b/paddle/fluid/inference/paddle_inference.map
@@ -73,6 +73,7 @@
 		*Pass*;
 		*profile*;
 		*phi*;
+		PD_*;
 		*cinn*;
 	local:
 		*;
diff --git a/paddle/fluid/inference/paddle_inference_custom_device.map b/paddle/fluid/inference/paddle_inference_custom_device.map
deleted file mode 100644
index 06fdeaac83832e6498cd478a1ce7ef53f50e2df1..0000000000000000000000000000000000000000
--- a/paddle/fluid/inference/paddle_inference_custom_device.map
+++ /dev/null
@@ -1,81 +0,0 @@
-{
-	global:
-		extern "C++" {
-			*paddle_infer::GetVersion*;
-			*paddle_infer::UpdateDllFlag*;
-			*paddle_infer::experimental::InternalUtils*;
-			*paddle_infer::Tensor*;
-			*paddle_infer::Predictor*;
-			*paddle_infer::CreatePredictor*;
-			*paddle_infer::GetTrtCompileVersion*;
-			*paddle_infer::GetTrtRuntimeVersion*;
-			*paddle_infer::GetNumBytesOfDataType*;
-			*paddle_infer::ConvertToMixedPrecision*;
-			*paddle_infer::contrib::TensorUtils*;
-			*paddle_infer::contrib::Status*;
-			*paddle_infer::services::PredictorPool*;
-			*paddle_infer::LayoutConvert*;
-
-			*paddle::experimental*;
-			*paddle::internal*;
-			*paddle::get_version*;
-			*paddle::LiteNNAdapterConfig*;
-			*paddle::AnalysisConfig::*;
-			*paddle::PaddlePredictor::*;
-			*paddle::CreatePaddlePredictor*;
-			*paddle::NativePaddlePredictor*;
-			*paddle::AnalysisPredictor*;
-			*paddle::PaddleDtypeSize*;
-			*paddle::ZeroCopyTensor*;
-			*paddle::*Strategy*;
-			*paddle::NativeConfig*;
-			*paddle::PaddleBuf*;
-			*paddle::PaddleTensor*;
-			*paddle::UpdateDllFlag*;
-			*paddle::MakeCipher*;
-			*paddle::DistConfig*;
-			*paddle::DefaultGPUPlace*;
-			*paddle::ResourceManager*;
-			*paddle::GPUContextResource*;
-			*paddle::CPUContextResource*;
-			*paddle::OpMetaInfoBuilder*;
-			*paddle::CustomOpKernelContext*;
-
-			/* ut needs the following symbol, we need to modify all the ut to hidden such symbols */
-
-			/* Another question: the ut size will grow from 50M to 80M, why? */
-
-			*paddle::detail*;
-			*paddle::imperative*;
-			*paddle::detailv3*;
-			*paddle::memory*;
-			*paddle::string*;
-			*paddle::operators*;
-
-			*paddle::distributed*;
-			/* *paddle::distributed::FleetWrapper*; */
-			/* *paddle::distributed::TensorTable*;  */
-			/* *paddle::distributed::TableManager*; */
-
-			/* *paddle::inference*; */
-			*paddle::inference::ReadBinaryFile*;
-
-			*paddle::platform*;
-			/* *paddle::platform::GetExportedFlagInfoMap*; */
-
-			/* *paddle::framework*; */
-			*paddle::framework::InterpreterCore*;
-			*paddle::framework::Executor*;
-			*paddle::framework::proto*;
-		};
-
-		/* The following symbols need to reconsider. */
-		*Pass*;
-		*profile*;
-		*phi*;
-		*FLAGS_*;
-		PD_*;
-		*cinn*;
-	local:
-		*;
-};
diff --git a/paddle/phi/core/enforce.cc b/paddle/phi/core/enforce.cc
index 7d4efead494316b83c1813b563c637364e21895f..dc309a3185ca88448588bcb43c8626e30bc9672b 100644
--- a/paddle/phi/core/enforce.cc
+++ b/paddle/phi/core/enforce.cc
@@ -20,7 +20,8 @@ limitations under the License. */
 #include <vector>
 
 #include "paddle/utils/blank.h"
-#include "paddle/utils/variant.h"
+
+DECLARE_int32(call_stack_level);
 
 namespace egr {
 class EagerVariable;
@@ -88,6 +89,8 @@ using NameTensorMap = NameVarMap<egr::EagerVariable>;
 namespace phi {
 namespace enforce {
 
+int GetCallStackLevel() { return FLAGS_call_stack_level; }
+
 template <typename T>
 static std::string ReplaceComplexTypeStr(std::string str,
                                          const std::string& type_name) {
diff --git a/paddle/phi/core/enforce.h b/paddle/phi/core/enforce.h
index e0dd918cef418824016fcce142534c735de6f8bb..31147356a5455f3b97df4075b2d58762fca2d048 100644
--- a/paddle/phi/core/enforce.h
+++ b/paddle/phi/core/enforce.h
@@ -101,8 +101,6 @@ limitations under the License. */
 
 #include "paddle/utils/variant.h"
 
-DECLARE_int32(call_stack_level);
-
 namespace phi {
 class ErrorSummary;
 }  // namespace phi
@@ -235,6 +233,7 @@ struct BinaryCompareMessageConverter<false> {
 };
 }  // namespace details
 
+int GetCallStackLevel();
 std::string GetCurrentTraceBackString(bool for_signal = false);
 std::string SimplifyErrorTypeFormat(const std::string& str);
 
@@ -243,7 +242,7 @@ static std::string GetErrorSumaryString(StrType&& what,
                                         const char* file,
                                         int line) {
   std::ostringstream sout;
-  if (FLAGS_call_stack_level > 1) {
+  if (GetCallStackLevel() > 1) {
     sout << "\n----------------------\nError Message "
             "Summary:\n----------------------\n";
   }
@@ -270,7 +269,7 @@ template <typename StrType>
 static std::string GetTraceBackString(StrType&& what,
                                       const char* file,
                                       int line) {
-  if (FLAGS_call_stack_level > 1) {
+  if (GetCallStackLevel() > 1) {
     // FLAGS_call_stack_level>1 means showing c++ call stack
     return GetCurrentTraceBackString() + GetErrorSumaryString(what, file, line);
   } else {
@@ -317,7 +316,7 @@ struct EnforceNotMet : public std::exception {
   }
 
   const char* what() const noexcept override {
-    if (FLAGS_call_stack_level > 1) {
+    if (GetCallStackLevel() > 1) {
       return err_str_.c_str();
     } else {
       return simple_err_str_.c_str();
@@ -331,7 +330,7 @@ struct EnforceNotMet : public std::exception {
   const std::string& simple_error_str() const { return simple_err_str_; }
 
   void set_error_str(std::string str) {
-    if (FLAGS_call_stack_level > 1) {
+    if (GetCallStackLevel() > 1) {
       err_str_ = str;
     } else {
       simple_err_str_ = str;