[PHI decoupling] move "flags.h" from fluid to phi (#48696)

39ffef0d · PuQing · GitHub · dd57860d · 39ffef0d · 39ffef0d
36 changed file
--- a/paddle/fluid/distributed/store/tcp_store.cc
+++ b/paddle/fluid/distributed/store/tcp_store.cc
@@ -20,7 +20,7 @@
 #include "paddle/fluid/distributed/store/tcp_utils.h"
 #include "paddle/fluid/platform/enforce.h"
-#include "paddle/fluid/platform/flags.h"
+#include "paddle/phi/core/flags.h"
 namespace paddle {
 namespace distributed {

--- a/paddle/fluid/eager/tests/performance_tests/benchmark_eager_cpu.cc
+++ b/paddle/fluid/eager/tests/performance_tests/benchmark_eager_cpu.cc
@@ -25,7 +25,7 @@
 #include "paddle/fluid/eager/tests/performance_tests/benchmark_utils.h"
 #include "paddle/fluid/eager/tests/test_utils.h"
 #include "paddle/fluid/imperative/tracer.h"
-#include "paddle/fluid/platform/flags.h"
+#include "paddle/phi/core/flags.h"
 #ifdef WITH_GPERFTOOLS
 #include "gperftools/profiler.h"

--- a/paddle/fluid/eager/tests/performance_tests/benchmark_eager_cuda.cc
+++ b/paddle/fluid/eager/tests/performance_tests/benchmark_eager_cuda.cc
@@ -24,7 +24,7 @@
 #include "paddle/fluid/eager/tests/performance_tests/benchmark_utils.h"
 #include "paddle/fluid/eager/tests/test_utils.h"
 #include "paddle/fluid/imperative/tracer.h"
-#include "paddle/fluid/platform/flags.h"
+#include "paddle/phi/core/flags.h"
 #ifdef WITH_GPERFTOOLS
 #include "gperftools/profiler.h"

--- a/paddle/fluid/framework/new_executor/executor_statistics.cc
+++ b/paddle/fluid/framework/new_executor/executor_statistics.cc
@@ -24,9 +24,9 @@
 #include <vector>
 #include "glog/logging.h"
-#include "paddle/fluid/platform/flags.h"
 #include "paddle/fluid/platform/os_info.h"
 #include "paddle/fluid/platform/profiler/utils.h"
+#include "paddle/phi/core/flags.h"
 DECLARE_bool(use_stream_safe_cuda_allocator);
 PADDLE_DEFINE_EXPORTED_string(static_executor_perfstat_filepath,

--- a/paddle/fluid/imperative/flags.cc
+++ b/paddle/fluid/imperative/flags.cc
@@ -14,7 +14,7 @@
 #include "paddle/fluid/imperative/flags.h"
-#include "paddle/fluid/platform/flags.h"
+#include "paddle/phi/core/flags.h"
 PADDLE_DEFINE_EXPORTED_uint64(dygraph_debug,
                              0,

--- a/paddle/fluid/imperative/profiler.cc
+++ b/paddle/fluid/imperative/profiler.cc
@@ -21,7 +21,7 @@
 #include <mutex>  // NOLINT
-#include "paddle/fluid/platform/flags.h"
+#include "paddle/phi/core/flags.h"
 PADDLE_DEFINE_EXPORTED_string(
    tracer_profile_fname,

--- a/paddle/fluid/inference/api/analysis_predictor.cc
+++ b/paddle/fluid/inference/api/analysis_predictor.cc
@@ -1422,8 +1422,7 @@ CreatePaddlePredictor<AnalysisConfig, PaddleEngineKind::kAnalysis>(
      }
      // support set flags from enviorment.
-      const platform::ExportedFlagInfoMap &env_map =
+      const phi::ExportedFlagInfoMap &env_map = phi::GetExportedFlagInfoMap();
-          platform::GetExportedFlagInfoMap();
      std::ostringstream os;
      os << "--tryfromenv=";
      for (auto &pair : env_map) {

--- a/paddle/fluid/memory/allocation/auto_growth_best_fit_allocator.cc
+++ b/paddle/fluid/memory/allocation/auto_growth_best_fit_allocator.cc
@@ -18,8 +18,8 @@
 #include <mutex>  // NOLINT
 #include "paddle/fluid/memory/allocation/aligned_allocator.h"
-#include "paddle/fluid/platform/flags.h"
 #include "paddle/fluid/platform/profiler/event_tracing.h"
+#include "paddle/phi/core/flags.h"
 PADDLE_DEFINE_EXPORTED_READONLY_bool(
    free_idle_chunk,

--- a/paddle/fluid/operators/controlflow/conditional_block_op.cc
+++ b/paddle/fluid/operators/controlflow/conditional_block_op.cc
@@ -17,7 +17,7 @@ limitations under the License. */
 #include "paddle/fluid/framework/new_executor/standalone_executor.h"
 #include "paddle/fluid/operators/assign_op.h"
 #include "paddle/fluid/operators/controlflow/control_flow_op_helper.h"
-#include "paddle/fluid/platform/flags.h"
+#include "paddle/phi/core/flags.h"
 #include "paddle/phi/kernels/funcs/math_function.h"
 #ifdef PADDLE_WITH_MKLDNN

--- a/paddle/fluid/operators/conv_op.cc
+++ b/paddle/fluid/operators/conv_op.cc
@@ -25,7 +25,6 @@ limitations under the License. */
 #include "paddle/fluid/platform/mkldnn_helper.h"
 #endif
 #include "paddle/fluid/framework/infershape_utils.h"
-#include "paddle/fluid/platform/cudnn_workspace_helper.h"
 #include "paddle/phi/infermeta/binary.h"
 namespace paddle {

--- a/paddle/fluid/operators/conv_transpose_op.cc
+++ b/paddle/fluid/operators/conv_transpose_op.cc
@@ -21,7 +21,6 @@ limitations under the License. */
 #include "paddle/fluid/framework/infershape_utils.h"
 #include "paddle/fluid/framework/op_registry.h"
 #include "paddle/fluid/framework/op_version_registry.h"
-#include "paddle/fluid/platform/cudnn_workspace_helper.h"
 #include "paddle/phi/core/infermeta_utils.h"
 #include "paddle/phi/infermeta/backward.h"
 #include "paddle/phi/infermeta/binary.h"

--- a/paddle/fluid/operators/fused/fusion_conv_inception_op.cc
+++ b/paddle/fluid/operators/fused/fusion_conv_inception_op.cc
@@ -16,7 +16,7 @@ limitations under the License. */
 #include <vector>
 #include "paddle/fluid/framework/op_registry.h"
-#include "paddle/fluid/platform/cudnn_workspace_helper.h"
+#include "paddle/phi/backends/gpu/cuda/cudnn_workspace_helper.h"
 namespace paddle {
 namespace operators {
@@ -113,7 +113,7 @@ class ConvInceptionFusionOpMaker : public framework::OpProtoAndCheckerMaker {
                 "allocated/freed each time the operator runs, larger "
                 "workspace size can increase performance but also requires "
                 "better hardware. This size should be chosen carefully.")
-        .SetDefault(platform::GetDefaultConvWorkspaceSizeLimitMB());
+        .SetDefault(phi::backends::gpu::GetDefaultConvWorkspaceSizeLimitMB());
    AddComment(R"DOC(
 )DOC");
  }

--- a/paddle/fluid/platform/CMakeLists.txt
+++ b/paddle/fluid/platform/CMakeLists.txt
@@ -31,10 +31,6 @@ if(WITH_PYTHON)
  endif()
 endif()
-cc_library(
-  flags
-  SRCS flags.cc
-  DEPS gflags)
 cc_library(
  denormal
  SRCS denormal.cc
@@ -178,11 +174,6 @@ if(WITH_GLOO)
    DEPS framework_proto gloo_wrapper enforce)
 endif()
-cc_library(
-  cudnn_workspace_helper
-  SRCS cudnn_workspace_helper.cc
-  DEPS)
 # separate init from device_context to avoid cycle dependencies
 cc_library(
  init

--- a/paddle/fluid/platform/cpu_info.cc
+++ b/paddle/fluid/platform/cpu_info.cc
@@ -32,7 +32,7 @@ limitations under the License. */
 #include <algorithm>
-#include "paddle/fluid/platform/flags.h"
+#include "paddle/phi/core/flags.h"
 DECLARE_double(fraction_of_cpu_memory_to_use);
 DECLARE_uint64(initial_cpu_memory_in_mb);

--- a/paddle/fluid/platform/device/gpu/gpu_info.cc
+++ b/paddle/fluid/platform/device/gpu/gpu_info.cc
@@ -24,7 +24,6 @@ limitations under the License. */
 #include "paddle/fluid/memory/memory.h"
 #include "paddle/fluid/platform/cuda_device_guard.h"
 #include "paddle/fluid/platform/enforce.h"
-#include "paddle/fluid/platform/flags.h"
 #include "paddle/fluid/platform/lock_guard_ptr.h"
 #include "paddle/fluid/platform/macros.h"
 #include "paddle/fluid/platform/monitor.h"
@@ -32,6 +31,7 @@ limitations under the License. */
 #include "paddle/fluid/platform/profiler/mem_tracing.h"
 #include "paddle/fluid/string/split.h"
 #include "paddle/phi/backends/gpu/gpu_info.h"
+#include "paddle/phi/core/flags.h"
 #ifdef PADDLE_WITH_HIP
 #include "paddle/fluid/platform/dynload/miopen.h"

--- a/paddle/fluid/platform/enforce.h
+++ b/paddle/fluid/platform/enforce.h
@@ -101,7 +101,7 @@ limitations under the License. */
 #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
 #include "paddle/fluid/platform/device/gpu/gpu_types.h"
 #endif
-#include "paddle/fluid/platform/flags.h"
+#include "paddle/phi/core/flags.h"
 namespace phi {
 class ErrorSummary;

--- a/paddle/fluid/platform/profiler/host_tracer.cc
+++ b/paddle/fluid/platform/profiler/host_tracer.cc
@@ -17,9 +17,9 @@
 #include "glog/logging.h"
 #include "paddle/fluid/framework/op_proto_maker.h"
-#include "paddle/fluid/platform/flags.h"
 #include "paddle/fluid/platform/profiler/common_event.h"
 #include "paddle/fluid/platform/profiler/host_event_recorder.h"
+#include "paddle/phi/core/flags.h"
 // Used to filter events, works like glog VLOG(level).
 // RecordEvent will works if host_trace_level >= level.

--- a/paddle/fluid/pybind/global_value_getter_setter.cc
+++ b/paddle/fluid/pybind/global_value_getter_setter.cc
@@ -252,7 +252,7 @@ static void RegisterGlobalVarGetterSetter() {
  REGISTER_PUBLIC_GLOBAL_VAR(FLAGS_rpc_prefetch_thread_num);
 #endif
-  const auto &flag_map = platform::GetExportedFlagInfoMap();
+  const auto &flag_map = phi::GetExportedFlagInfoMap();
  for (const auto &pair : flag_map) {
    const std::string &name = pair.second.name;
    bool is_writable = pair.second.is_writable;

--- a/paddle/phi/api/yaml/generator/ops_extra_info_gen.py
+++ b/paddle/phi/api/yaml/generator/ops_extra_info_gen.py
@@ -22,7 +22,7 @@ def map_code_template(attrs_str, attrs_checker_str):
    return f"""// This file is generated by paddle/phi/api/yaml/generator/ops_extra_info_gen.py
 #include "paddle/fluid/operators/ops_extra_info.h"
-#include "paddle/fluid/platform/cudnn_workspace_helper.h"
+#include "paddle/phi/backends/gpu/cuda/cudnn_workspace_helper.h"
 namespace paddle {{
 namespace operators {{

--- a/paddle/phi/api/yaml/op_compat.yaml
+++ b/paddle/phi/api/yaml/op_compat.yaml
@@ -202,7 +202,7 @@
             str fuse_activation = "", float fuse_alpha = 0.0f, float fuse_beta = 0.0f, bool use_addto = false,
             bool fuse_residual_connection = false, float Scale_in = 1.0f, float Scale_out = 1.0f,
             float Scale_in_eltwise = 1.0f, 'float[] Scale_weights = {1.0f}', bool force_fp32_output = false,
-             int workspace_size_MB = platform::GetDefaultConvWorkspaceSizeLimitMB(), bool exhaustive_search = false]
+             int workspace_size_MB = phi::backends::gpu::GetDefaultConvWorkspaceSizeLimitMB(), bool exhaustive_search = false]
 - op : conv2d_fusion
  extra :
@@ -211,7 +211,7 @@
             str fuse_activation = "", float fuse_alpha = 0.0f, float fuse_beta = 0.0f, bool use_addto = false,
             bool fuse_residual_connection = false, float Scale_in = 1.0f, float Scale_out = 1.0f,
             float Scale_in_eltwise = 1.0f, 'float[] Scale_weights = {1.0f}', bool force_fp32_output = false,
-             int workspace_size_MB = platform::GetDefaultConvWorkspaceSizeLimitMB(), bool exhaustive_search = false]
+             int workspace_size_MB = phi::backends::gpu::GetDefaultConvWorkspaceSizeLimitMB(), bool exhaustive_search = false]
 - op : conv2d_transpose
  backward : conv2d_transpose_grad
@@ -219,7 +219,7 @@
    attrs : [bool is_test = false, bool use_cudnn = true, bool use_mkldnn = false, bool force_fp32_output = false,
             str mkldnn_data_type = "float32", bool fuse_relu = false,
             str fuse_activation = "", float fuse_alpha = 0.0f, float fuse_beta = 0.0f,
-             int workspace_size_MB = platform::GetDefaultConvWorkspaceSizeLimitMB()]
+             int workspace_size_MB = phi::backends::gpu::GetDefaultConvWorkspaceSizeLimitMB()]
 - op : conv3d
  backward : conv3d_grad
@@ -227,12 +227,12 @@
    attrs : [bool is_test = false, bool use_cudnn = true, bool use_mkldnn = false, str mkldnn_data_type = "float32", bool fuse_relu = false,
             str fuse_activation = "", float fuse_alpha = 0.0f, float fuse_beta = 0.0f,
             bool use_addto = false, bool fuse_residual_connection = false, bool force_fp32_output = false,
-             int workspace_size_MB = platform::GetDefaultConvWorkspaceSizeLimitMB(), bool exhaustive_search = false]
+             int workspace_size_MB = phi::backends::gpu::GetDefaultConvWorkspaceSizeLimitMB(), bool exhaustive_search = false]
 - op : conv3d_transpose
  backward : conv3d_transpose_grad
  extra :
-    attrs : [bool use_cudnn = true, bool use_mkldnn = false, int workspace_size_MB = platform::GetDefaultConvWorkspaceSizeLimitMB()]
+    attrs : [bool use_cudnn = true, bool use_mkldnn = false, int workspace_size_MB = phi::backends::gpu::GetDefaultConvWorkspaceSizeLimitMB()]
 - op : cos
  backward : cos_grad, cos_double_grad, cos_triple_grad
@@ -273,7 +273,7 @@
             str fuse_activation = "", float fuse_alpha = 0.0f, float fuse_beta = 0.0f, bool use_addto = false,
             bool fuse_residual_connection = false, float Scale_in = 1.0f, float Scale_out = 1.0f,
             float Scale_in_eltwise = 1.0f, 'float[] Scale_weights = {1.0f}', bool force_fp32_output = false,
-             int workspace_size_MB = platform::GetDefaultConvWorkspaceSizeLimitMB(), bool exhaustive_search = false]
+             int workspace_size_MB = phi::backends::gpu::GetDefaultConvWorkspaceSizeLimitMB(), bool exhaustive_search = false]
 - op : depthwise_conv2d_transpose
  backward : depthwise_conv2d_transpose_grad
@@ -281,7 +281,7 @@
    attrs : [bool is_test = false, bool use_cudnn = false, bool use_mkldnn = false, bool force_fp32_output = false,
             str mkldnn_data_type = "float32", bool fuse_relu = false,
             str fuse_activation = "", float fuse_alpha = 0.0f, float fuse_beta = 0.0f,
-             int workspace_size_MB = platform::GetDefaultConvWorkspaceSizeLimitMB()]
+             int workspace_size_MB = phi::backends::gpu::GetDefaultConvWorkspaceSizeLimitMB()]
 - op : dequantize_linear
  extra :

--- a/paddle/phi/backends/CMakeLists.txt
+++ b/paddle/phi/backends/CMakeLists.txt
 add_subdirectory(dynload)
+add_subdirectory(gpu)
 set(BACKENDS_SRCS all_context.cc cpu/cpu_context.cc)
 set(BACKENDS_DEPS enforce place flags eigen3 phi_device_context)

--- a/paddle/phi/backends/gpu/CMakeLists.txt
+++ b/paddle/phi/backends/gpu/CMakeLists.txt
+add_subdirectory(cuda)
--- a/paddle/phi/backends/gpu/cuda/CMakeLists.txt
+++ b/paddle/phi/backends/gpu/cuda/CMakeLists.txt
+cc_library(cudnn_workspace_helper SRCS cudnn_workspace_helper.cc)
--- a/paddle/fluid/platform/cudnn_workspace_helper.cc
+++ b/paddle/fluid/platform/cudnn_workspace_helper.cc
-// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -12,13 +12,14 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
-#include "paddle/fluid/platform/cudnn_workspace_helper.h"
+#include "paddle/phi/backends/gpu/cuda/cudnn_workspace_helper.h"
 #include <cstdlib>
 #include <string>
-namespace paddle {
+namespace phi {
-namespace platform {
+namespace backends {
+namespace gpu {
 static int GetDefaultConvWorkspaceSizeLimitMBImpl() {
  const char *env_str = std::getenv("FLAGS_conv_workspace_size_limit");
@@ -30,6 +31,6 @@ int GetDefaultConvWorkspaceSizeLimitMB() {
  static auto workspace_size = GetDefaultConvWorkspaceSizeLimitMBImpl();
  return workspace_size;
 }
+}  // namespace gpu
-}  // namespace platform
+}  // namespace backends
-}  // namespace paddle
+}  // namespace phi
--- a/paddle/fluid/platform/cudnn_workspace_helper.h
+++ b/paddle/fluid/platform/cudnn_workspace_helper.h
-// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -14,12 +14,13 @@
 #pragma once
-namespace paddle {
+namespace phi {
-namespace platform {
+namespace backends {
+namespace gpu {
 static constexpr int kDefaultConvWorkspaceSizeLimitMB = 512;
 int GetDefaultConvWorkspaceSizeLimitMB();
+}  // namespace gpu
-}  // namespace platform
+}  // namespace backends
-}  // namespace paddle
+}  // namespace phi
--- a/paddle/phi/backends/xpu/xpu_info.cc
+++ b/paddle/phi/backends/xpu/xpu_info.cc
@@ -21,7 +21,7 @@ limitations under the License. */
 // TODO(wilber): The phi computing library requires a component to manage
 // flags.
-#include "paddle/fluid/platform/flags.h"
+#include "paddle/phi/core/flags.h"
 PADDLE_DEFINE_EXPORTED_string(
    selected_xpus,

--- a/paddle/phi/core/CMakeLists.txt
+++ b/paddle/phi/core/CMakeLists.txt
@@ -5,6 +5,11 @@ if(WITH_GPU)
  proto_library(external_error_proto SRCS external_error.proto)
 endif()
+cc_library(
+  flags
+  SRCS flags.cc
+  DEPS gflags)
 cc_library(errors SRCS errors.cc)
 set(phi_enforce_deps errors flags)
 if(WITH_GPU)

--- a/paddle/fluid/platform/flags.cc
+++ b/paddle/fluid/platform/flags.cc
@@ -13,13 +13,12 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
-#include "paddle/fluid/platform/flags.h"
+#include "paddle/phi/core/flags.h"
 #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
-#include "paddle/fluid/platform/cudnn_workspace_helper.h"
+#include "paddle/phi/backends/gpu/cuda/cudnn_workspace_helper.h"
 #endif
-namespace paddle {
+namespace phi {
-namespace platform {
 const ExportedFlagInfoMap &GetExportedFlagInfoMap() {
  return *GetMutableExportedFlagInfoMap();
@@ -30,8 +29,7 @@ ExportedFlagInfoMap *GetMutableExportedFlagInfoMap() {
  return &g_exported_flag_info_map;
 }
-}  // namespace platform
+}  // namespace phi
-}  // namespace paddle
 PADDLE_DEFINE_EXPORTED_int32(inner_op_parallelism,
                             0,
@@ -261,9 +259,10 @@ PADDLE_DEFINE_EXPORTED_bool(
 * increased.
 *       Users need to balance memory and speed.
 */
-PADDLE_DEFINE_EXPORTED_int64(conv_workspace_size_limit,
+PADDLE_DEFINE_EXPORTED_int64(
-                             paddle::platform::kDefaultConvWorkspaceSizeLimitMB,
+    conv_workspace_size_limit,
-                             "cuDNN convolution workspace limit in MB unit.");
+    phi::backends::gpu::kDefaultConvWorkspaceSizeLimitMB,
+    "cuDNN convolution workspace limit in MB unit.");
 /**
 * CUDNN related FLAG

--- a/paddle/fluid/platform/flags.h
+++ b/paddle/fluid/platform/flags.h
@@ -20,12 +20,11 @@
 #include <type_traits>
 #include "gflags/gflags.h"
-#include "paddle/fluid/platform/macros.h"
+#include "paddle/phi/core/macros.h"
 #include "paddle/utils/variant.h"
-namespace paddle {
+namespace phi {
-namespace platform {
 struct FlagInfo {
  using ValueType =
@@ -51,7 +50,7 @@ ExportedFlagInfoMap *GetMutableExportedFlagInfoMap();
      static_assert(std::is_same<FlagDeclaredType, ::std::string>::value ||   \
                        std::is_arithmetic<FlagDeclaredType>::value,          \
                    "FLAGS should be std::string or arithmetic type");        \
-      auto *instance = ::paddle::platform::GetMutableExportedFlagInfoMap();   \
+      auto *instance = ::phi::GetMutableExportedFlagInfoMap();                \
      auto &info = (*instance)[#__name];                                      \
      info.name = #__name;                                                    \
      info.value_ptr = &(FLAGS_##__name);                                     \
@@ -96,5 +95,4 @@ ExportedFlagInfoMap *GetMutableExportedFlagInfoMap();
  __PADDLE_DEFINE_EXPORTED_FLAG(                                \
      name, true, ::std::string, string, default_value, doc)
-}  // namespace platform
+}  // namespace phi
-}  // namespace paddle
--- a/paddle/phi/kernels/gpu/batch_norm_grad_kernel.cu
+++ b/paddle/phi/kernels/gpu/batch_norm_grad_kernel.cu
@@ -14,11 +14,11 @@
 #include "paddle/fluid/operators/layout_utils.h"
 #include "paddle/fluid/operators/norm_utils.cu.h"
-#include "paddle/fluid/platform/flags.h"
 #include "paddle/phi/backends/gpu/gpu_context.h"
 #include "paddle/phi/backends/gpu/gpu_dnn.h"
 #include "paddle/phi/common/layout.h"
 #include "paddle/phi/core/enforce.h"
+#include "paddle/phi/core/flags.h"
 #include "paddle/phi/core/kernel_registry.h"
 #include "paddle/phi/kernels/batch_norm_kernel.h"
 #include "paddle/phi/kernels/empty_kernel.h"

--- a/paddle/phi/kernels/gpu/batch_norm_kernel.cu
+++ b/paddle/phi/kernels/gpu/batch_norm_kernel.cu
@@ -22,11 +22,11 @@ namespace cub = hipcub;
 #include "paddle/fluid/operators/layout_utils.h"
 #include "paddle/fluid/operators/norm_utils.cu.h"
-#include "paddle/fluid/platform/flags.h"
 #include "paddle/phi/backends/gpu/gpu_context.h"
 #include "paddle/phi/backends/gpu/gpu_dnn.h"
 #include "paddle/phi/common/layout.h"
 #include "paddle/phi/core/enforce.h"
+#include "paddle/phi/core/flags.h"
 #include "paddle/phi/core/kernel_registry.h"
 #include "paddle/phi/kernels/batch_norm_kernel.h"
 #include "paddle/phi/kernels/funcs/batch_norm_utils.h"

--- a/paddle/phi/kernels/gpu/gelu_funcs.h
+++ b/paddle/phi/kernels/gpu/gelu_funcs.h
@@ -14,10 +14,10 @@
 #pragma once
-#include "paddle/fluid/platform/flags.h"
 #include "paddle/phi/backends/gpu/gpu_context.h"
 #include "paddle/phi/common/amp_type_traits.h"
 #include "paddle/phi/common/place.h"
+#include "paddle/phi/core/flags.h"
 #include "paddle/phi/kernels/funcs/aligned_vector.h"
 DECLARE_bool(use_fast_math);

--- a/paddle/phi/kernels/gpudnn/conv_grad_kernel.cu
+++ b/paddle/phi/kernels/gpudnn/conv_grad_kernel.cu
@@ -23,8 +23,8 @@
 #include "paddle/phi/kernels/gpudnn/conv_cudnn_v7.h"
 #endif
-#include "paddle/fluid/platform/cudnn_workspace_helper.h"
 #include "paddle/fluid/platform/profiler.h"
+#include "paddle/phi/backends/gpu/cuda/cudnn_workspace_helper.h"
 #include "paddle/phi/common/bfloat16.h"
 #include "paddle/phi/common/float16.h"
 #include "paddle/phi/kernels/cpu/conv_util.h"

--- a/paddle/phi/kernels/gpudnn/conv_kernel.cu
+++ b/paddle/phi/kernels/gpudnn/conv_kernel.cu
@@ -24,8 +24,8 @@
 #include "paddle/phi/kernels/gpudnn/conv_cudnn_v7.h"
 #endif
-#include "paddle/fluid/platform/cudnn_workspace_helper.h"
 #include "paddle/fluid/platform/profiler.h"
+#include "paddle/phi/backends/gpu/cuda/cudnn_workspace_helper.h"
 #include "paddle/phi/common/bfloat16.h"
 #include "paddle/phi/common/float16.h"
 #include "paddle/phi/kernels/cpu/conv_util.h"

--- a/paddle/phi/kernels/impl/conv_cudnn_impl.h
+++ b/paddle/phi/kernels/impl/conv_cudnn_impl.h
@@ -23,9 +23,9 @@
 #include "paddle/phi/kernels/gpudnn/conv_cudnn_v7.h"
 #endif
-#include "paddle/fluid/platform/cudnn_workspace_helper.h"
 #include "paddle/fluid/platform/profiler.h"
 #include "paddle/phi/backends/dynload/cudnn.h"
+#include "paddle/phi/backends/gpu/cuda/cudnn_workspace_helper.h"
 #include "paddle/phi/common/float16.h"
 #include "paddle/phi/kernels/cpu/conv_util.h"
 #include "paddle/phi/kernels/funcs/batch_norm_utils.h"

--- a/paddle/testing/paddle_gtest_main.cc
+++ b/paddle/testing/paddle_gtest_main.cc
@@ -17,8 +17,8 @@ limitations under the License. */
 #include "paddle/fluid/framework/phi_utils.h"
 #include "paddle/fluid/memory/allocation/allocator_strategy.h"
 #include "paddle/fluid/platform/device/npu/npu_info.h"
-#include "paddle/fluid/platform/flags.h"
 #include "paddle/fluid/platform/init.h"
+#include "paddle/phi/core/flags.h"
 #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
 DECLARE_bool(enable_gpu_memory_usage_log);
@@ -43,7 +43,7 @@ int main(int argc, char** argv) {
  }
 #endif
-  const auto& flag_map = paddle::platform::GetExportedFlagInfoMap();
+  const auto& flag_map = phi::GetExportedFlagInfoMap();
  for (const auto& pair : flag_map) {
    const std::string& name = pair.second.name;
    // NOTE(zhiqiu): some names may not linked in some tests, so add to