From 2ef4ec71117f77bfe2021fbbf624e617098f3abd Mon Sep 17 00:00:00 2001
From: huangjiyi <43315610+huangjiyi@users.noreply.github.com>
Date: Wed, 30 Aug 2023 16:58:24 +0800
Subject: [PATCH] Add paddle custom flags support (#56256)

* update

* repalce gflags header

* replace DEFINE_<type> with PD_DEFINE_<type>

* fix bug

* fix bug

* fix bug

* update cmake

* add :: before some paddle namespace

* fix link error

* fix CI-Py3

* allow commandline parse

* fix SetFlagsFromEnv

* fix bug

* fix bug

* fix CI-CINN

* fix CI-Coverage-build

* fix CI-Windows-build

* fix CI-Inference

* fix bug

* fix bug

* fix CI-CINN

* fix inference api test

* fix infer_ut test

* revert infer_ut gflags usage

* update

* fix inference

* remove flags export macro

* revert inference demo_ci gflags usage

* update

* update

* update

* update

* update

* update

* update

* update

* fix bug when turn on WITH_GFLAGS

* turn on WITH_GFLAGS

* fix bug when turn on WITH_GFLAGS

* fix bug when turn on WITH_GFLAGS

* update

* update and add unittest

* add unittest

* fix conflict

* rerun ci

* update

* resolve conflict
---
 CMakeLists.txt                                |   1 +
 cmake/cinn.cmake                              |  10 +
 cmake/configure.cmake                         |   4 +
 cmake/external/brpc.cmake                     |  13 +
 cmake/external/gflags.cmake                   |  11 +
 cmake/inference_lib.cmake                     |  11 +
 .../collective/process_group_custom.cc        |   2 +-
 .../collective/process_group_nccl.cc          |   2 +-
 .../fluid/distributed/collective/reducer.cc   |   2 +-
 .../distributed/fleet_executor/CMakeLists.txt |  11 +-
 .../distributed/fleet_executor/carrier.cc     |   2 +-
 .../distributed/ps/service/CMakeLists.txt     |  27 +-
 .../distributed/ps/service/brpc_ps_client.cc  |  79 +--
 .../distributed/ps/service/brpc_ps_server.cc  |  24 +-
 .../ps/service/communicator/communicator.cc   |  26 +-
 .../ps/service/communicator/communicator.h    |   8 +-
 .../ps/service/coordinator_client.cc          |  12 +-
 .../ps/service/coordinator_client.h           |  15 +-
 paddle/fluid/distributed/ps/service/env.h     |   2 +-
 .../distributed/ps/service/heter_client.cc    |  14 +-
 .../distributed/ps/service/heter_client.h     |   4 +-
 .../distributed/ps/service/heter_server.cc    |   8 +-
 .../distributed/ps/service/heter_server.h     |  18 +-
 .../fluid/distributed/ps/service/ps_client.cc |   5 +-
 .../ps/service/ps_service/graph_py_service.h  |  10 +-
 .../ps/service/ps_service/service.cc          |  21 +-
 .../ps/service/ps_service/service.h           |  18 +-
 paddle/fluid/distributed/ps/service/server.h  |   6 +-
 .../ps/service/simple_rpc/baidu_rpc_server.cc |   4 +-
 .../ps/table/common_graph_table.cc            | 126 ++---
 .../distributed/ps/table/common_graph_table.h |  10 +-
 .../distributed/ps/table/ctr_accessor.cc      |   3 +-
 .../ps/table/ctr_double_accessor.cc           |   3 +-
 .../distributed/ps/table/ctr_dymf_accessor.cc |   3 +-
 .../distributed/ps/table/depends/dense.h      |   2 +-
 .../ps/table/depends/feature_value.h          |   5 +-
 .../ps/table/depends/initializers.h           |   6 +-
 .../distributed/ps/table/graph/class_macro.h  |   2 +-
 .../ps/table/memory_sparse_table.cc           |  97 ++--
 .../distributed/ps/table/sparse_accessor.cc   |   5 +-
 .../distributed/ps/table/sparse_sgd_rule.cc   |   6 +-
 .../distributed/ps/table/ssd_sparse_table.cc  | 214 ++++----
 .../distributed/ps/table/ssd_sparse_table.h   |   2 +-
 paddle/fluid/distributed/ps/table/table.h     |   6 +-
 paddle/fluid/distributed/ps/wrapper/fleet.cc  |  40 +-
 paddle/fluid/distributed/ps/wrapper/fleet.h   |  10 +-
 paddle/fluid/distributed/rpc/CMakeLists.txt   |  12 +-
 paddle/fluid/framework/CMakeLists.txt         |   6 +-
 paddle/fluid/framework/async_executor.cc      |   2 +-
 .../fluid/framework/copy_same_tensor_test.cc  |   4 +-
 .../fluid/framework/details/bkcl_op_handle.h  |   2 +-
 .../fluid/framework/details/build_strategy.cc |   4 +-
 .../framework/details/build_strategy_test.cc  |   2 +-
 .../details/fused_all_reduce_op_handle.cc     |   2 +-
 paddle/fluid/framework/executor.cc            |   2 +-
 .../fluid/framework/executor_thread_worker.cc |   2 +-
 paddle/fluid/framework/fleet/CMakeLists.txt   |   2 +-
 paddle/fluid/framework/garbage_collector.cc   |   2 +-
 paddle/fluid/framework/garbage_collector.h    |   2 +-
 paddle/fluid/framework/ir/graph.h             |   5 +-
 paddle/fluid/framework/ir/graph_helper.cc     |   2 +-
 .../framework/ir/graph_to_program_pass.cc     |   3 +-
 .../new_executor/executor_statistics.cc       |   2 +-
 .../interpreter/dependency_builder.h          |   2 +-
 .../interpreter/execution_config.cc           |   2 +-
 .../new_executor/interpreter_base_impl.h      |  14 +-
 .../framework/new_executor/interpretercore.h  |   2 +-
 .../new_executor/new_ir_interpreter.cc        |   2 +-
 paddle/fluid/framework/op_kernel_type.h       |   4 +-
 paddle/fluid/framework/operator.cc            |   6 +-
 paddle/fluid/framework/operator_test.cc       |   2 +-
 .../framework/paddle2cinn/build_cinn_pass.cc  |   6 +-
 .../framework/paddle2cinn/cinn_compiler.cc    |   2 +-
 .../paddle2cinn/cinn_compiler_test.cc         |   2 +-
 paddle/fluid/framework/scope.cc               |   2 +-
 paddle/fluid/framework/unused_var_check.h     |   2 +-
 paddle/fluid/imperative/prepared_operator.cc  |   2 +-
 paddle/fluid/imperative/tracer.cc             |   2 +-
 paddle/fluid/imperative/tracer.h              |   2 +-
 paddle/fluid/inference/CMakeLists.txt         |   4 +
 paddle/fluid/inference/analysis/analyzer.h    |   2 +-
 .../ir_params_sync_among_devices_pass.cc      |   2 +-
 paddle/fluid/inference/analysis/ut_helper.h   |   4 +-
 .../fluid/inference/api/analysis_predictor.cc |  10 +-
 paddle/fluid/inference/api/api.cc             |  12 +-
 paddle/fluid/inference/api/api_impl.cc        |   3 +-
 .../api/demo_ci/onnxruntime_mobilenet_demo.cc |   2 +-
 .../api/demo_ci/simple_on_word2vec.cc         |   2 +-
 .../api/demo_ci/trt_mobilenet_demo.cc         |   2 +-
 .../fluid/inference/api/demo_ci/vis_demo.cc   |   5 +-
 .../api/demo_ci/windows_mobilenet.cc          |   2 +-
 .../api/onnxruntime_predictor_tester.cc       |   2 +-
 paddle/fluid/inference/api/paddle_api.h       |   2 +-
 .../inference/api/paddle_inference_api.h      |   2 +-
 paddle/fluid/inference/io.cc                  |  12 +-
 paddle/fluid/inference/paddle_inference.map   |   1 +
 .../inference/tensorrt/plugin/trt_plugin.h    |   2 +-
 .../allocator_facade_abs_flags_test.cc        |   2 +-
 .../allocator_facade_frac_flags_test.cc       |   2 +-
 ...o_growth_best_fit_allocator_facade_test.cc |   2 +-
 .../auto_growth_best_fit_allocator_test.cc    |   4 +-
 .../allocation/naive_best_fit_allocator.cc    |   2 +-
 paddle/fluid/operators/pscore/CMakeLists.txt  |   8 +-
 paddle/fluid/operators/tdm_child_op.h         |   2 +-
 paddle/fluid/operators/tdm_sampler_op.h       |   2 +-
 paddle/fluid/platform/cpu_info_test.cc        |   2 +-
 .../platform/cuda_graph_with_memory_pool.cc   |   2 +-
 paddle/fluid/platform/device/gpu/gpu_info.cc  |   2 +-
 paddle/fluid/platform/device/xpu/xpu_info.cc  |   2 +-
 .../fluid/platform/dynload/dynamic_loader.cc  |   2 +-
 paddle/fluid/platform/enforce.h               |   2 +-
 paddle/fluid/platform/flags.h                 |   2 +-
 paddle/fluid/platform/init.cc                 |   4 +-
 paddle/fluid/platform/init.h                  |   2 +-
 paddle/fluid/platform/profiler.cc             |   4 +-
 paddle/fluid/platform/profiler/host_tracer.cc |   8 -
 paddle/fluid/platform/profiler/profiler.cc    |   8 +
 paddle/fluid/platform/profiler/profiler.h     |   2 +-
 paddle/fluid/pybind/CMakeLists.txt            |  13 +-
 paddle/fluid/pybind/eager_method.cc           |   2 +-
 .../pybind/global_value_getter_setter.cc      |   6 +-
 paddle/fluid/pybind/reader_py.cc              |   2 +-
 paddle/fluid/string/pretty_log.h              |   2 +-
 paddle/phi/CMakeLists.txt                     |   2 +-
 paddle/phi/api/lib/api_gen_utils.cc           |   4 +-
 paddle/phi/api/lib/data_transform.cc          |   4 +-
 paddle/phi/api/profiler/device_tracer.cc      |   4 +-
 paddle/phi/api/yaml/generator/api_gen.py      |   6 +-
 .../api/yaml/generator/backward_api_gen.py    |   6 +-
 .../phi/api/yaml/generator/dist_bw_api_gen.py |   6 +-
 .../yaml/generator/intermediate_api_gen.py    |   4 +-
 .../phi/api/yaml/generator/sparse_api_gen.py  |   4 +-
 .../api/yaml/generator/sparse_bw_api_gen.py   |   4 +-
 .../phi/api/yaml/generator/strings_api_gen.py |   4 +-
 .../api/yaml/generator/tensor_operants_gen.py |   4 +-
 paddle/phi/backends/cpu/cpu_info.cc           |   6 +-
 paddle/phi/backends/device_base.cc            |   8 +-
 paddle/phi/backends/dynload/cudnn_frontend.h  |   4 +-
 paddle/phi/backends/dynload/dynamic_loader.cc |   2 +-
 paddle/phi/backends/gpu/cuda/cudnn_helper.h   |   4 +-
 paddle/phi/backends/gpu/gpu_info.cc           |   4 +-
 paddle/phi/backends/gpu/rocm/miopen_helper.h  |   4 +-
 paddle/phi/core/enforce.cc                    |   4 +-
 paddle/phi/core/flags.cc                      |  28 +-
 paddle/phi/core/flags.h                       |  21 +-
 paddle/phi/core/kernel_factory.cc             |   8 +-
 paddle/phi/core/threadpool.cc                 |  10 +-
 paddle/phi/infermeta/unary.cc                 |   2 +-
 .../kernels/autotune/cache_cudnn_frontend.h   |   2 +-
 .../phi/kernels/autotune/switch_autotune.cc   |   4 +-
 paddle/phi/kernels/cpu/adam_kernel.cc         |   2 +-
 paddle/phi/kernels/funcs/blas/blas_impl.cu.h  |   2 +-
 paddle/phi/kernels/funcs/blas/blas_impl.hip.h |   4 +-
 .../phi/kernels/funcs/fused_gemm_epilogue.h   |   4 +-
 paddle/phi/kernels/funcs/jit/benchmark.cc     |  12 +-
 paddle/phi/kernels/funcs/jit/gen_base.h       |   2 +-
 paddle/phi/kernels/funcs/jit/test.cc          |   4 +-
 .../phi/kernels/gpu/batch_norm_grad_kernel.cu |   2 +-
 paddle/phi/kernels/gpu/batch_norm_kernel.cu   |   2 +-
 .../kernels/gpu/c_embedding_grad_kernel.cu    |   4 +-
 .../phi/kernels/gpu/embedding_grad_kernel.cu  |   4 +-
 .../phi/kernels/gpu/flash_attn_grad_kernel.cu |   2 +-
 paddle/phi/kernels/gpu/flash_attn_kernel.cu   |   2 +-
 paddle/phi/kernels/gpu/gelu_grad_kernel.cu    |   2 +-
 paddle/phi/kernels/gpu/gelu_kernel.cu         |   2 +-
 paddle/phi/kernels/gpu/index_add_kernel.cu    |   4 +-
 .../kernels/gpu/index_select_grad_kernel.cu   |   4 +-
 paddle/phi/kernels/gpu/layer_norm_kernel.cu   |   4 +-
 paddle/phi/kernels/gpu/randperm_kernel.cu     |   2 +-
 .../phi/kernels/gpu/uniform_inplace_kernel.cu |   2 +-
 paddle/phi/kernels/gpu/uniform_kernel.cu      |   2 +-
 paddle/phi/kernels/gpudnn/conv_gpudnn_info.h  |   6 +-
 paddle/phi/kernels/impl/conv_cudnn_impl.h     |   6 +-
 paddle/phi/kernels/impl/einsum_impl.h         |   4 +-
 .../phi/kernels/legacy/gpu/uniform_kernel.cu  |   2 +-
 .../kernels/selected_rows/cpu/adam_kernel.cc  |   4 +-
 paddle/testing/paddle_gtest_main.cc           |  38 +-
 paddle/utils/CMakeLists.txt                   |   8 +
 paddle/utils/flags.h                          |  77 +++
 paddle/utils/flags_native.cc                  | 484 ++++++++++++++++++
 paddle/utils/flags_native.h                   | 131 +++++
 paddle/utils/flags_native_test.cc             | 104 ++++
 paddle/utils/string/pretty_log.cc             |   4 +-
 paddle/utils/string/pretty_log.h              |   4 +-
 test/cpp/fluid/benchmark/op_tester.cc         |  14 +-
 .../mkldnn/test_mkldnn_cpu_quantize_pass.cc   |   2 +-
 test/cpp/fluid/pscore/CMakeLists.txt          |  12 +-
 test/cpp/fluid/pscore/switch_server_test.cc   |  10 +-
 .../api/analysis_predictor_tester.cc          |   2 +-
 ...er_bfloat16_image_classification_tester.cc |   2 +-
 test/cpp/inference/api/analyzer_dam_tester.cc |   8 +-
 ...nalyzer_detect_functional_mkldnn_tester.cc |   4 +-
 .../inference/api/analyzer_detect_tester.cc   |   4 +-
 .../analyzer_image_classification_tester.cc   |   2 +-
 ...alyzer_int8_image_classification_tester.cc |   4 +-
 .../analyzer_int8_object_detection_tester.cc  |  10 +-
 test/cpp/inference/api/analyzer_lac_tester.cc |  14 +-
 test/cpp/inference/api/analyzer_mmp_tester.cc |  16 +-
 ...lyzer_quant_image_classification_tester.cc |   2 +-
 .../cpp/inference/api/analyzer_rnn1_tester.cc |   2 +-
 .../api/analyzer_seq_pool1_tester_helper.h    |  16 +-
 test/cpp/inference/api/analyzer_vis_tester.cc |   4 +-
 test/cpp/inference/api/api_impl_tester.cc     |  62 +--
 .../inference/api/ipu_multi_model_profile.cc  |   2 +-
 .../inference/api/ipu_resnet50_fp16_test.cc   |   2 +-
 test/cpp/inference/api/ipu_resnet50_test.cc   |   2 +-
 test/cpp/inference/api/ipu_word2vec_sample.cc |   6 +-
 test/cpp/inference/api/lite_mul_model_test.cc |   2 +-
 test/cpp/inference/api/lite_resnet50_test.cc  |   2 +-
 .../inference/api/mkldnn_quantizer_tester.cc  |   2 +-
 .../paddle_infer_api_copy_tensor_tester.cc    |   2 +-
 .../api/paddle_infer_api_errors_tester.cc     |   2 +-
 .../inference/api/paddle_infer_api_test.cc    |   2 +-
 test/cpp/inference/api/tester_helper.h        | 138 ++---
 .../inference/api/trt_cascade_rcnn_test.cc    |   2 +-
 ...e_ernie_fp16_serialize_deserialize_test.cc |   2 +-
 ..._shape_ernie_serialize_deserialize_test.cc |   2 +-
 ...c_shape_ernie_serialize_deserialize_test.h |   2 +-
 .../api/trt_dynamic_shape_ernie_test.cc       |   2 +-
 .../inference/api/trt_dynamic_shape_test.cc   |   2 +-
 ...rt_dynamic_shape_transformer_prune_test.cc |   2 +-
 test/cpp/inference/api/trt_fc_prelu_test.cc   |   2 +-
 .../api/trt_instance_norm_converter_test.cc   |   2 +-
 .../api/trt_mark_trt_engine_outputs_test.cc   |   1 -
 test/cpp/inference/api/trt_mobilenet_test.cc  |   2 +-
 test/cpp/inference/api/trt_quant_int8_test.cc |   2 +-
 .../api/trt_quant_int8_yolov3_r50_test.cc     |   2 +-
 .../inference/api/trt_rebind_stream_test.cc   |   2 +-
 test/cpp/inference/api/trt_resnet50_test.cc   |   2 +-
 test/cpp/inference/api/trt_resnext_test.cc    |   2 +-
 .../inference/api/trt_split_converter_test.cc |   2 +-
 test/cpp/inference/api/trt_test_helper.h      |   8 +-
 .../inference/api/xpu_config_resnet50_test.cc |   2 +-
 .../api/xpu_runtime_config_resnet50_test.cc   |   2 +-
 test/cpp/inference/infer_ut/test_LeViT.cc     |   2 +-
 .../cpp/inference/infer_ut/test_det_mv3_db.cc |   2 +-
 .../inference/infer_ut/test_ernie_text_cls.cc |   2 +-
 .../infer_ut/test_ernie_xnli_int8.cc          |   2 +-
 .../cpp/inference/infer_ut/test_mobilnetv1.cc |   2 +-
 .../inference/infer_ut/test_ppyolo_mbv3.cc    |   2 +-
 .../inference/infer_ut/test_ppyolov2_r50vd.cc |   2 +-
 test/cpp/inference/infer_ut/test_resnet50.cc  |   2 +-
 .../inference/infer_ut/test_resnet50_quant.cc |   2 +-
 test/cpp/inference/infer_ut/test_yolov3.cc    |   2 +-
 test/cpp/phi/api/scale_api.h                  |   2 +-
 test/cpp/prim/test_static_prim.cc             |   2 +-
 246 files changed, 1750 insertions(+), 956 deletions(-)
 create mode 100644 paddle/utils/flags.h
 create mode 100644 paddle/utils/flags_native.cc
 create mode 100644 paddle/utils/flags_native.h
 create mode 100644 paddle/utils/flags_native_test.cc
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 34e748c794f..318c9df4893 100755
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -254,6 +254,7 @@ option(WITH_BRPC_RDMA "Use brpc rdma as the rpc protocal" OFF)
 option(ON_INFER "Turn on inference optimization and inference-lib generation"
        ON)
 option(WITH_CPP_DIST "Install PaddlePaddle C++ distribution" OFF)
+option(WITH_GFLAGS "Compile PaddlePaddle with gflags support" OFF)
 ################################ Internal Configurations #######################################
 option(WITH_NV_JETSON "Compile PaddlePaddle with NV JETSON" OFF)
 option(WITH_PROFILER "Compile PaddlePaddle with GPU profiler and gperftools"
diff --git a/cmake/cinn.cmake b/cmake/cinn.cmake
index de13f71526c..ca25a7d5d30 100644
--- a/cmake/cinn.cmake
+++ b/cmake/cinn.cmake
@@ -183,6 +183,11 @@ if(WITH_MKL)
   endif()
 endif()
 
+if(NOT WITH_GFLAGS)
+  target_link_libraries(cinnapi gflags)
+  add_dependencies(cinnapi gflags)
+endif()
+
 if(WITH_GPU)
   target_link_libraries(
     cinnapi
@@ -237,6 +242,11 @@ function(gen_cinncore LINKTYPE)
     endif()
   endif()
 
+  if(NOT WITH_GFLAGS)
+    target_link_libraries(${CINNCORE_TARGET} gflags)
+    add_dependencies(${CINNCORE_TARGET} gflags)
+  endif()
+
   if(WITH_GPU)
     target_link_libraries(
       ${CINNCORE_TARGET}
diff --git a/cmake/configure.cmake b/cmake/configure.cmake
index dc661fce388..4d0b04209c0 100644
--- a/cmake/configure.cmake
+++ b/cmake/configure.cmake
@@ -201,6 +201,10 @@ if(WITH_DISTRIBUTE)
   add_definitions(-DPADDLE_WITH_DISTRIBUTE)
 endif()
 
+if(WITH_GFLAGS)
+  add_definitions(-DPADDLE_WITH_GFLAGS)
+endif()
+
 if(WITH_PSCORE)
   add_definitions(-DPADDLE_WITH_PSCORE)
 endif()
diff --git a/cmake/external/brpc.cmake b/cmake/external/brpc.cmake
index ff33e142add..3c9f2b69620 100755
--- a/cmake/external/brpc.cmake
+++ b/cmake/external/brpc.cmake
@@ -91,3 +91,16 @@ add_dependencies(brpc extern_brpc)
 add_definitions(-DBRPC_WITH_GLOG)
 
 list(APPEND external_project_dependencies brpc)
+
+set(EXTERNAL_BRPC_DEPS
+    brpc
+    protobuf
+    ssl
+    crypto
+    leveldb
+    glog
+    snappy)
+
+if(NOT WITH_GFLAGS)
+  set(EXTERNAL_BRPC_DEPS ${EXTERNAL_BRPC_DEPS} gflags)
+endif()
diff --git a/cmake/external/gflags.cmake b/cmake/external/gflags.cmake
index 06827732e59..75436783c7e 100755
--- a/cmake/external/gflags.cmake
+++ b/cmake/external/gflags.cmake
@@ -102,3 +102,14 @@ if(WIN32)
     set_property(GLOBAL PROPERTY OS_DEPENDENCY_MODULES shlwapi.lib)
   endif()
 endif()
+
+# We have implemented a custom flags tool paddle_flags to replace gflags.
+# User can also choose to use gflags by setting WITH_GFLAGS=ON. But when
+# using paddle_flags, gflags is also needed for other third party libraries
+# including glog and brpc. So we can not remove gflags completely.
+set(flags_dep)
+if(WITH_GFLAGS)
+  list(APPEND flags_dep gflags)
+else()
+  list(APPEND flags_dep paddle_flags)
+endif()
diff --git a/cmake/inference_lib.cmake b/cmake/inference_lib.cmake
index c97a68a0175..13fce961365 100755
--- a/cmake/inference_lib.cmake
+++ b/cmake/inference_lib.cmake
@@ -336,11 +336,22 @@ copy(
   inference_lib_dist
   SRCS ${PADDLE_SOURCE_DIR}/paddle/utils/flat_hash_map.h
   DSTS ${PADDLE_INFERENCE_INSTALL_DIR}/paddle/include/paddle/utils/)
+copy(
+  inference_lib_dist
+  SRCS ${PADDLE_SOURCE_DIR}/paddle/utils/flags.h
+  DSTS ${PADDLE_INFERENCE_INSTALL_DIR}/paddle/include/paddle/utils/)
 copy(
   inference_lib_dist
   SRCS ${PADDLE_SOURCE_DIR}/paddle/extension.h
   DSTS ${PADDLE_INFERENCE_INSTALL_DIR}/paddle/include/paddle/)
 
+if(NOT WITH_GFLAGS)
+  copy(
+    inference_lib_dist
+    SRCS ${PADDLE_SOURCE_DIR}/paddle/utils/flags_native.h
+    DSTS ${PADDLE_INFERENCE_INSTALL_DIR}/paddle/include/paddle/utils/)
+endif()
+
 # the include path of phi needs to be changed to adapt to inference api path
 add_custom_command(
   TARGET inference_lib_dist
diff --git a/paddle/fluid/distributed/collective/process_group_custom.cc b/paddle/fluid/distributed/collective/process_group_custom.cc
index 6d5c30da313..a905fcc0955 100644
--- a/paddle/fluid/distributed/collective/process_group_custom.cc
+++ b/paddle/fluid/distributed/collective/process_group_custom.cc
@@ -27,7 +27,7 @@
 
 constexpr int64_t kWaitBlockTImeout = 10;
 
-DECLARE_bool(use_stream_safe_cuda_allocator);
+PD_DECLARE_bool(use_stream_safe_cuda_allocator);
 
 namespace paddle {
 namespace distributed {
diff --git a/paddle/fluid/distributed/collective/process_group_nccl.cc b/paddle/fluid/distributed/collective/process_group_nccl.cc
index a8539278d2f..7ffe00b8cd8 100644
--- a/paddle/fluid/distributed/collective/process_group_nccl.cc
+++ b/paddle/fluid/distributed/collective/process_group_nccl.cc
@@ -29,7 +29,7 @@
 #include "paddle/phi/core/distributed/comm_context_manager.h"
 
 PHI_DECLARE_bool(nccl_blocking_wait);
-DECLARE_bool(use_stream_safe_cuda_allocator);
+PD_DECLARE_bool(use_stream_safe_cuda_allocator);
 
 // set this flag to `true` and recompile to enable dynamic checks
 constexpr bool FLAGS_enable_nccl_dynamic_check = false;
diff --git a/paddle/fluid/distributed/collective/reducer.cc b/paddle/fluid/distributed/collective/reducer.cc
index c3262b8db8d..378668499c9 100644
--- a/paddle/fluid/distributed/collective/reducer.cc
+++ b/paddle/fluid/distributed/collective/reducer.cc
@@ -18,7 +18,7 @@
 #include "paddle/phi/backends/device_manager.h"
 #include "paddle/phi/core/flags.h"
 
-DECLARE_bool(use_stream_safe_cuda_allocator);
+PD_DECLARE_bool(use_stream_safe_cuda_allocator);
 PHI_DECLARE_string(allocator_strategy);
 
 namespace paddle {
diff --git a/paddle/fluid/distributed/fleet_executor/CMakeLists.txt b/paddle/fluid/distributed/fleet_executor/CMakeLists.txt
index eea257b5ddf..9c282055201 100755
--- a/paddle/fluid/distributed/fleet_executor/CMakeLists.txt
+++ b/paddle/fluid/distributed/fleet_executor/CMakeLists.txt
@@ -7,16 +7,7 @@ proto_library(interceptor_message_proto SRCS interceptor_message.proto)
 if(WITH_ARM_BRPC)
   set(BRPC_DEPS arm_brpc snappy phi glog)
 elseif(WITH_DISTRIBUTE AND NOT WITH_PSLIB)
-  set(BRPC_DEPS
-      brpc
-      ssl
-      crypto
-      protobuf
-      zlib
-      leveldb
-      snappy
-      phi
-      glog)
+  set(BRPC_DEPS ${EXTERNAL_BRPC_DEPS} zlib phi)
 else()
   set(BRPC_DEPS "")
 endif()
diff --git a/paddle/fluid/distributed/fleet_executor/carrier.cc b/paddle/fluid/distributed/fleet_executor/carrier.cc
index 1dc29493af9..bb128c1287c 100644
--- a/paddle/fluid/distributed/fleet_executor/carrier.cc
+++ b/paddle/fluid/distributed/fleet_executor/carrier.cc
@@ -17,7 +17,6 @@
 #include <algorithm>
 #include <vector>
 
-#include "gflags/gflags.h"
 #include "paddle/fluid/distributed/fleet_executor/global.h"
 #include "paddle/fluid/distributed/fleet_executor/interceptor.h"
 #include "paddle/fluid/distributed/fleet_executor/message_bus.h"
@@ -29,6 +28,7 @@
 #include "paddle/fluid/framework/variable.h"
 #include "paddle/fluid/framework/variable_helper.h"
 #include "paddle/fluid/platform/flags.h"
+#include "paddle/utils/flags.h"
 PADDLE_DEFINE_EXPORTED_bool(
     fleet_executor_with_standalone,
     false,
diff --git a/paddle/fluid/distributed/ps/service/CMakeLists.txt b/paddle/fluid/distributed/ps/service/CMakeLists.txt
index 585dd111bf7..c23f26c6352 100755
--- a/paddle/fluid/distributed/ps/service/CMakeLists.txt
+++ b/paddle/fluid/distributed/ps/service/CMakeLists.txt
@@ -3,34 +3,11 @@ set_source_files_properties(${BRPC_SRCS})
 
 if(WITH_HETERPS)
 
-  set(BRPC_DEPS
-      brpc
-      ssl
-      crypto
-      protobuf
-      phi
-      glog
-      zlib
-      leveldb
-      snappy
-      glog
-      device_context
-      rocksdb)
+  set(BRPC_DEPS ${EXTERNAL_BRPC_DEPS} phi zlib device_context rocksdb)
 
 else()
 
-  set(BRPC_DEPS
-      brpc
-      ssl
-      crypto
-      protobuf
-      phi
-      glog
-      zlib
-      leveldb
-      snappy
-      glog
-      device_context)
+  set(BRPC_DEPS ${EXTERNAL_BRPC_DEPS} phi zlib device_context)
 
 endif()
 
diff --git a/paddle/fluid/distributed/ps/service/brpc_ps_client.cc b/paddle/fluid/distributed/ps/service/brpc_ps_client.cc
index 93fe8c849be..9ad8768e092 100644
--- a/paddle/fluid/distributed/ps/service/brpc_ps_client.cc
+++ b/paddle/fluid/distributed/ps/service/brpc_ps_client.cc
@@ -34,49 +34,53 @@ class Variable;
 namespace paddle {
 namespace distributed {
 
-DEFINE_int32(pserver_push_dense_merge_limit,
-             12,
-             "limit max push_dense local merge requests");
+PD_DEFINE_int32(pserver_push_dense_merge_limit,
+                12,
+                "limit max push_dense local merge requests");
 
-DEFINE_int32(pserver_push_sparse_merge_limit,
-             12,
-             "limit max push_sparse local merge requests");
+PD_DEFINE_int32(pserver_push_sparse_merge_limit,
+                12,
+                "limit max push_sparse local merge requests");
 
-DEFINE_int32(pserver_pull_dense_limit,
-             12,
-             "limit max push_sparse local merge requests");
+PD_DEFINE_int32(pserver_pull_dense_limit,
+                12,
+                "limit max push_sparse local merge requests");
 
-DEFINE_int32(pserver_async_push_dense_interval_ms,
-             10,
-             "async push_dense to server interval");
+PD_DEFINE_int32(pserver_async_push_dense_interval_ms,
+                10,
+                "async push_dense to server interval");
 
-DEFINE_int32(pserver_async_push_sparse_interval_ms,
-             10,
-             "async push_sparse to server interval");
+PD_DEFINE_int32(pserver_async_push_sparse_interval_ms,
+                10,
+                "async push_sparse to server interval");
 
-DEFINE_bool(pserver_scale_gradient_by_merge,
-            false,
-            "scale dense gradient when merged");
+PD_DEFINE_bool(pserver_scale_gradient_by_merge,
+               false,
+               "scale dense gradient when merged");
 
-DEFINE_int32(pserver_communicate_compress_type,
-             0,
-             "none:0 snappy:1 gzip:2 zlib:3 lz4:4");
+PD_DEFINE_int32(pserver_communicate_compress_type,
+                0,
+                "none:0 snappy:1 gzip:2 zlib:3 lz4:4");
 
-DEFINE_int32(pserver_max_async_call_num,
-             13,
-             "max task num in async_call_server");
+PD_DEFINE_int32(pserver_max_async_call_num,
+                13,
+                "max task num in async_call_server");
 
-DEFINE_int32(pserver_timeout_ms, 500000, "pserver request server timeout_ms");
+PD_DEFINE_int32(pserver_timeout_ms,
+                500000,
+                "pserver request server timeout_ms");
 
-DEFINE_int32(pserver_connect_timeout_ms,
-             10000,
-             "pserver connect server timeout_ms");
+PD_DEFINE_int32(pserver_connect_timeout_ms,
+                10000,
+                "pserver connect server timeout_ms");
 
-DEFINE_int32(pserver_sparse_merge_thread, 1, "pserver sparse merge thread num");
+PD_DEFINE_int32(pserver_sparse_merge_thread,
+                1,
+                "pserver sparse merge thread num");
 
-DEFINE_int32(pserver_sparse_table_shard_num,
-             1000,
-             "sparse table shard for save & load");
+PD_DEFINE_int32(pserver_sparse_table_shard_num,
+                1000,
+                "sparse table shard for save & load");
 
 inline size_t get_sparse_shard(uint32_t shard_num,
                                uint32_t server_num,
@@ -140,7 +144,7 @@ int32_t BrpcPsClient::StartFlClientService(const std::string &self_endpoint) {
 
   if (_fl_server.Start(self_endpoint.c_str(), &options) != 0) {
     VLOG(0) << "fl-ps > StartFlClientService failed. Try again.";
-    auto ip_port = paddle::string::Split(self_endpoint, ':');
+    auto ip_port = ::paddle::string::Split(self_endpoint, ':');
     std::string ip = ip_port[0];
     int port = std::stoi(ip_port[1]);
     std::string int_ip_port = GetIntTypeEndpoint(ip, port);
@@ -202,8 +206,7 @@ int32_t BrpcPsClient::InitializeFlWorker(const std::string &self_endpoint) {
   options.protocol = "baidu_std";
   options.timeout_ms = FLAGS_pserver_timeout_ms;
   options.connection_type = "pooled";
-  options.connect_timeout_ms =
-      paddle::distributed::FLAGS_pserver_connect_timeout_ms;
+  options.connect_timeout_ms = FLAGS_pserver_connect_timeout_ms;
   options.max_retry = 3;
   // 获取 coordinator 列表，并连接
   std::string coordinator_ip_port;
@@ -336,11 +339,11 @@ int32_t BrpcPsClient::Initialize() {
     auto table_id = worker_param.downpour_table_param(i).table_id();
     if (type == PS_DENSE_TABLE) {
       _push_dense_task_queue_map[table_id] =
-          paddle::framework::MakeChannel<DenseAsyncTask *>();
+          ::paddle::framework::MakeChannel<DenseAsyncTask *>();
     }
     if (type == PS_SPARSE_TABLE) {
       _push_sparse_task_queue_map[table_id] =
-          paddle::framework::MakeChannel<SparseAsyncTask *>();
+          ::paddle::framework::MakeChannel<SparseAsyncTask *>();
       _push_sparse_merge_count_map[table_id] = 0;
     }
   }
@@ -446,7 +449,7 @@ std::future<int32_t> BrpcPsClient::PrintTableStat(uint32_t table_id) {
         int ret = 0;
         uint64_t feasign_size = 0;
         uint64_t mf_size = 0;
-        paddle::framework::BinaryArchive ar;
+        ::paddle::framework::BinaryArchive ar;
         auto *closure = reinterpret_cast<DownpourBrpcClosure *>(done);
         for (size_t i = 0; i < request_call_num; ++i) {
           if (closure->check_response(i, PS_PRINT_TABLE_STAT) != 0) {
diff --git a/paddle/fluid/distributed/ps/service/brpc_ps_server.cc b/paddle/fluid/distributed/ps/service/brpc_ps_server.cc
index 84784c32f3b..28ac123fa08 100644
--- a/paddle/fluid/distributed/ps/service/brpc_ps_server.cc
+++ b/paddle/fluid/distributed/ps/service/brpc_ps_server.cc
@@ -30,15 +30,15 @@ class RpcController;
 }  // namespace protobuf
 }  // namespace google
 
-DEFINE_int32(pserver_timeout_ms_s2s,
-             10000,
-             "pserver request server timeout_ms");
-DEFINE_int32(pserver_connect_timeout_ms_s2s,
-             10000,
-             "pserver connect server timeout_ms");
-DEFINE_string(pserver_connection_type_s2s,
-              "pooled",
-              "pserver connection_type[pooled:single]");
+PD_DEFINE_int32(pserver_timeout_ms_s2s,
+                10000,
+                "pserver request server timeout_ms");
+PD_DEFINE_int32(pserver_connect_timeout_ms_s2s,
+                10000,
+                "pserver connect server timeout_ms");
+PD_DEFINE_string(pserver_connection_type_s2s,
+                 "pooled",
+                 "pserver connection_type[pooled:single]");
 
 namespace paddle {
 namespace distributed {
@@ -169,7 +169,7 @@ int32_t BrpcPsServer::ReceiveFromPServer(int msg_type,
     LOG(WARNING) << "SERVER>>RESPONSE>>msg = 0 Finish S2S Response";
     return 0;
   }
-  paddle::framework::BinaryArchive ar;
+  ::paddle::framework::BinaryArchive ar;
   ar.SetReadBuffer(const_cast<char *>(msg.c_str()), msg.length(), nullptr);
   if (ar.Cursor() == ar.Finish()) {
     LOG(WARNING) << "SERVER>>RESPONSE ar = 0>> Finish S2S Response";
@@ -598,7 +598,7 @@ int32_t BrpcPsService::PrintTableStat(Table *table,
                                       brpc::Controller *cntl) {
   CHECK_TABLE_EXIST(table, request, response)
   std::pair<int64_t, int64_t> ret = table->PrintTableStat();
-  paddle::framework::BinaryArchive ar;
+  ::paddle::framework::BinaryArchive ar;
   ar << ret.first << ret.second;
   std::string table_info(ar.Buffer(), ar.Length());
   response.set_data(table_info);
@@ -723,7 +723,7 @@ int32_t BrpcPsService::CacheShuffle(Table *table,
   table->Flush();
   double cache_threshold = std::stod(request.params(2));
   LOG(INFO) << "cache threshold for cache shuffle: " << cache_threshold;
-  //    auto shuffled_ins = paddle::ps::make_channel<std::pair<uint64_t,
+  //    auto shuffled_ins = ::paddle::ps::make_channel<std::pair<uint64_t,
   //    std::string>>();
   //    shuffled_ins->set_block_size(80000);
   _server->StartS2S();
diff --git a/paddle/fluid/distributed/ps/service/communicator/communicator.cc b/paddle/fluid/distributed/ps/service/communicator/communicator.cc
index 1ad58d9eb0f..9932343fa77 100644
--- a/paddle/fluid/distributed/ps/service/communicator/communicator.cc
+++ b/paddle/fluid/distributed/ps/service/communicator/communicator.cc
@@ -16,11 +16,11 @@ limitations under the License. */
 
 #include <google/protobuf/text_format.h>
 
-#include "gflags/gflags.h"
 #include "paddle/fluid/distributed/ps/service/brpc_ps_client.h"
 #include "paddle/fluid/distributed/ps/wrapper/fleet.h"
 #include "paddle/fluid/platform/profiler.h"
 #include "paddle/fluid/string/string_helper.h"
+#include "paddle/utils/flags.h"
 
 #define LEARNING_RATE_DECAY_COUNTER "@LR_DECAY_COUNTER@"
 #define STEP_COUNTER "@PS_STEP_COUNTER@"
@@ -42,7 +42,7 @@ Communicator::Communicator() = default;
 
 void Communicator::InitGFlag(const std::string &gflags) {
   VLOG(3) << "Init With Gflags:" << gflags;
-  std::vector<std::string> flags = paddle::string::split_string(gflags);
+  std::vector<std::string> flags = ::paddle::string::split_string(gflags);
   if (flags.empty()) {
     flags.push_back("-max_body_size=314217728");
     flags.push_back("-bthread_concurrency=40");
@@ -57,7 +57,7 @@ void Communicator::InitGFlag(const std::string &gflags) {
   }
   int params_cnt = flags.size();
   char **params_ptr = &(flags_ptr[0]);
-  ::GFLAGS_NAMESPACE::ParseCommandLineFlags(&params_cnt, &params_ptr, true);
+  ::paddle::flags::ParseCommandLineFlags(&params_cnt, &params_ptr);
 }
 
 std::once_flag Communicator::init_flag_;
@@ -66,7 +66,7 @@ std::shared_ptr<Communicator> Communicator::communicator_(nullptr);
 void Communicator::InitBrpcClient(
     const std::string &dist_desc,
     const std::vector<std::string> &host_sign_list) {
-  auto fleet = paddle::distributed::FleetWrapper::GetInstance();
+  auto fleet = ::paddle::distributed::FleetWrapper::GetInstance();
   if (_worker_ptr.get() == nullptr) {
     _worker_ptr = fleet->worker_ptr_;
   }
@@ -92,7 +92,7 @@ void Communicator::RpcRecvDense(const std::vector<std::string> &varnames,
   platform::RecordEvent record_event("Communicator->RpcRecvDense",
                                      platform::TracerEventType::Communication,
                                      1);
-  std::vector<paddle::distributed::Region> regions;
+  std::vector<::paddle::distributed::Region> regions;
   regions.reserve(varnames.size());
   for (auto &t : varnames) {
     Variable *var = scope->Var(t);
@@ -103,7 +103,7 @@ void Communicator::RpcRecvDense(const std::vector<std::string> &varnames,
       phi::DenseTensor *temp_tensor = temp_var->GetMutable<phi::DenseTensor>();
       temp_tensor->Resize(tensor->dims());
       float *temp_data = temp_tensor->mutable_data<float>(platform::CPUPlace());
-      paddle::distributed::Region reg(temp_data, tensor->numel());
+      ::paddle::distributed::Region reg(temp_data, tensor->numel());
       regions.emplace_back(std::move(reg));
       VLOG(1) << "Communicator::RpcRecvDense Var " << t << " table_id "
               << table_id << " Temp_data[0] " << temp_data[0]
@@ -111,7 +111,7 @@ void Communicator::RpcRecvDense(const std::vector<std::string> &varnames,
 #endif
     } else {
       float *w = tensor->mutable_data<float>(tensor->place());
-      paddle::distributed::Region reg(w, tensor->numel());
+      ::paddle::distributed::Region reg(w, tensor->numel());
       regions.emplace_back(std::move(reg));
     }
   }
@@ -152,7 +152,7 @@ void Communicator::RpcSendDenseParam(const std::vector<std::string> &varnames,
                                      platform::TracerEventType::Communication,
                                      1);
   auto place = platform::CPUPlace();
-  std::vector<paddle::distributed::Region> regions;
+  std::vector<::paddle::distributed::Region> regions;
   for (auto &t : varnames) {
     Variable *var = scope.FindVar(t);
     CHECK(var != nullptr) << "var[" << t << "] not found";
@@ -164,7 +164,7 @@ void Communicator::RpcSendDenseParam(const std::vector<std::string> &varnames,
       temp_tensor->Resize(tensor->dims());
       float *temp_data = temp_tensor->mutable_data<float>(platform::CPUPlace());
       framework::TensorCopy(*tensor, platform::CPUPlace(), temp_tensor);
-      paddle::distributed::Region reg(temp_data, tensor->numel());
+      ::paddle::distributed::Region reg(temp_data, tensor->numel());
       regions.emplace_back(std::move(reg));
       VLOG(1) << "rpc_send_dense_param Var " << t << " table_id " << table_id
               << " Temp_data[0] " << temp_data[0] << " Temp_data[-1] "
@@ -172,7 +172,7 @@ void Communicator::RpcSendDenseParam(const std::vector<std::string> &varnames,
 #endif
     } else {
       float *w = tensor->mutable_data<float>(place);
-      paddle::distributed::Region reg(w, tensor->numel());
+      ::paddle::distributed::Region reg(w, tensor->numel());
       regions.emplace_back(reg);
       VLOG(1) << "rpc_send_dense_param Var " << t << " table_id " << table_id
               << " Temp_data[0] " << w[0] << " Temp_data[-1] "
@@ -1096,10 +1096,10 @@ void GeoCommunicator::InitImpl(const RpcCtxMap &send_varname_to_ctx,
       parallel_task_nums_ += 1;
       sparse_id_queues_.insert(
           std::pair<std::string,
-                    paddle::framework::Channel<
+                    ::paddle::framework::Channel<
                         std::shared_ptr<std::vector<int64_t>>>>(
               splited_var,
-              paddle::framework::MakeChannel<
+              ::paddle::framework::MakeChannel<
                   std::shared_ptr<std::vector<int64_t>>>(send_queue_size_)));
     }
   }
@@ -1509,7 +1509,7 @@ void GeoCommunicator::MainThread() {
 void FLCommunicator::InitBrpcClient(
     const std::string &dist_desc,
     const std::vector<std::string> &host_sign_list) {
-  auto fleet = paddle::distributed::FleetWrapper::GetInstance();
+  auto fleet = ::paddle::distributed::FleetWrapper::GetInstance();
   if (_worker_ptr.get() == nullptr) {
     VLOG(0) << "fl-ps > FLCommunicator::InitBrpcClient get _worker_ptr";
     _worker_ptr =
diff --git a/paddle/fluid/distributed/ps/service/communicator/communicator.h b/paddle/fluid/distributed/ps/service/communicator/communicator.h
index 643c91b5b05..f6d062460b8 100644
--- a/paddle/fluid/distributed/ps/service/communicator/communicator.h
+++ b/paddle/fluid/distributed/ps/service/communicator/communicator.h
@@ -29,7 +29,6 @@ limitations under the License. */
 #include <utility>
 #include <vector>
 
-#include "gflags/gflags.h"
 #include "paddle/fluid/distributed/ps/service/communicator/communicator_common.h"
 #include "paddle/fluid/distributed/ps/service/coordinator_client.h"
 #include "paddle/fluid/distributed/ps/service/ps_client.h"
@@ -45,6 +44,7 @@ limitations under the License. */
 #include "paddle/phi/kernels/funcs/blas/blas.h"
 #include "paddle/phi/kernels/funcs/math_function.h"
 #include "paddle/phi/kernels/funcs/selected_rows_functor.h"
+#include "paddle/utils/flags.h"
 
 namespace paddle {
 namespace distributed {
@@ -410,8 +410,8 @@ class Communicator {
   }
 
   void InitGFlag(const std::string &gflags);
-  paddle::distributed::PSParameter _ps_param;
-  paddle::distributed::PaddlePSEnvironment _ps_env;
+  ::paddle::distributed::PSParameter _ps_param;
+  ::paddle::distributed::PaddlePSEnvironment _ps_env;
   int servers_ = 0;
   int trainers_;
   int trainer_id_ = 0;
@@ -661,7 +661,7 @@ class GeoCommunicator : public AsyncCommunicator {
 
   std::unordered_map<
       std::string,
-      paddle::framework::Channel<std::shared_ptr<std::vector<int64_t>>>>
+      ::paddle::framework::Channel<std::shared_ptr<std::vector<int64_t>>>>
       sparse_id_queues_;
 };
 
diff --git a/paddle/fluid/distributed/ps/service/coordinator_client.cc b/paddle/fluid/distributed/ps/service/coordinator_client.cc
index 9a77170b37c..c9c2ba49c9b 100644
--- a/paddle/fluid/distributed/ps/service/coordinator_client.cc
+++ b/paddle/fluid/distributed/ps/service/coordinator_client.cc
@@ -28,8 +28,8 @@ static const int MAX_PORT = 65535;
 namespace paddle {
 namespace distributed {
 
-DEFINE_uint64(total_fl_client_size, 100, "supported total fl client size");
-DEFINE_uint32(coordinator_wait_all_clients_max_time, 60, "uint32: s");
+PD_DEFINE_uint64(total_fl_client_size, 100, "supported total fl client size");
+PD_DEFINE_uint32(coordinator_wait_all_clients_max_time, 60, "uint32: s");
 
 void CoordinatorService::FLService(
     ::google::protobuf::RpcController* controller,
@@ -62,10 +62,10 @@ int32_t CoordinatorClient::Initialize(
     const std::vector<std::string>& trainer_endpoints) {
   brpc::ChannelOptions options;
   options.protocol = "baidu_std";
-  options.timeout_ms = paddle::distributed::FLAGS_pserver_timeout_ms;
+  options.timeout_ms = ::paddle::distributed::FLAGS_pserver_timeout_ms;
   options.connection_type = "pooled";
   options.connect_timeout_ms =
-      paddle::distributed::FLAGS_pserver_connect_timeout_ms;
+      ::paddle::distributed::FLAGS_pserver_connect_timeout_ms;
   options.max_retry = 3;
 
   std::string server_ip_port;
@@ -109,7 +109,7 @@ int32_t CoordinatorClient::Initialize(
   }
   for (size_t i = 0; i < trainer_endpoints.size(); i++) {
     std::vector<std::string> addr =
-        paddle::string::Split(trainer_endpoints[i], ':');
+        ::paddle::string::Split(trainer_endpoints[i], ':');
     fl_client_list[i].ip = addr[0];
     fl_client_list[i].port = std::stol(addr[1]);
     fl_client_list[i].rank = i;  // TO CHECK
@@ -152,7 +152,7 @@ int32_t CoordinatorClient::StartClientService() {
     LOG(ERROR) << "fl-ps > coordinator server endpoint not set";
     return -1;
   }
-  auto addr = paddle::string::Split(_endpoint, ':');
+  auto addr = ::paddle::string::Split(_endpoint, ':');
   std::string ip = addr[0];
   std::string port = addr[1];
   std::string rank = addr[2];
diff --git a/paddle/fluid/distributed/ps/service/coordinator_client.h b/paddle/fluid/distributed/ps/service/coordinator_client.h
index bd1f0f7754d..8db08c3fc79 100644
--- a/paddle/fluid/distributed/ps/service/coordinator_client.h
+++ b/paddle/fluid/distributed/ps/service/coordinator_client.h
@@ -34,10 +34,10 @@
 namespace paddle {
 namespace distributed {
 
-DECLARE_int32(pserver_timeout_ms);
-DECLARE_int32(pserver_connect_timeout_ms);
-DECLARE_uint64(total_fl_client_size);
-DECLARE_uint32(coordinator_wait_all_clients_max_time);
+PD_DECLARE_int32(pserver_timeout_ms);
+PD_DECLARE_int32(pserver_connect_timeout_ms);
+PD_DECLARE_uint64(total_fl_client_size);
+PD_DECLARE_uint32(coordinator_wait_all_clients_max_time);
 
 using CoordinatorServiceFunc =
     std::function<int32_t(const CoordinatorReqMessage& request,
@@ -91,10 +91,9 @@ class CoordinatorServiceHandle {
     timeline.Start();
     auto f = [&]() -> bool {
       while (query_wait_time <
-             paddle::distributed::
-                 FLAGS_coordinator_wait_all_clients_max_time) {  // in case that
-                                                                 // some
-                                                                 // clients down
+             FLAGS_coordinator_wait_all_clients_max_time) {  // in case that
+                                                             // some
+                                                             // clients down
         if (_is_all_clients_info_collected == true) {
           // LOG(INFO) << "fl-ps > _is_all_clients_info_collected";
           return true;
diff --git a/paddle/fluid/distributed/ps/service/env.h b/paddle/fluid/distributed/ps/service/env.h
index aa230f86c9d..d6b40352349 100644
--- a/paddle/fluid/distributed/ps/service/env.h
+++ b/paddle/fluid/distributed/ps/service/env.h
@@ -25,8 +25,8 @@
 #include <unordered_set>
 #include <vector>
 
-#include "gflags/gflags.h"
 #include "paddle/phi/core/macros.h"
+#include "paddle/utils/flags.h"
 
 namespace paddle {
 namespace distributed {
diff --git a/paddle/fluid/distributed/ps/service/heter_client.cc b/paddle/fluid/distributed/ps/service/heter_client.cc
index 2ca9fef5c08..d7a1f5cf7c4 100644
--- a/paddle/fluid/distributed/ps/service/heter_client.cc
+++ b/paddle/fluid/distributed/ps/service/heter_client.cc
@@ -19,8 +19,8 @@
 
 namespace paddle {
 namespace distributed {
-DEFINE_int32(heter_world_size, 100, "group size");  // group max size
-DEFINE_int32(switch_send_recv_timeout_s, 600, "switch_send_recv_timeout_s");
+PD_DEFINE_int32(heter_world_size, 100, "group size");  // group max size
+PD_DEFINE_int32(switch_send_recv_timeout_s, 600, "switch_send_recv_timeout_s");
 
 std::shared_ptr<HeterClient> HeterClient::s_instance_ = nullptr;
 std::mutex HeterClient::mtx_;
@@ -85,7 +85,7 @@ void HeterClient::CreateClient2XpuConnection() {
     xpu_channels_[i].reset(new brpc::Channel());
     if (xpu_channels_[i]->Init(xpu_list_[i].c_str(), "", &options) != 0) {
       VLOG(0) << "HeterClient channel init fail. Try Again";
-      auto ip_port = paddle::string::Split(xpu_list_[i], ':');
+      auto ip_port = ::paddle::string::Split(xpu_list_[i], ':');
       std::string ip = ip_port[0];
       int port = std::stoi(ip_port[1]);
       std::string int_ip_port = GetIntTypeEndpoint(ip, port);
@@ -100,7 +100,7 @@ void HeterClient::CreateClient2XpuConnection() {
     if (previous_xpu_channels_[i]->Init(
             previous_xpu_list_[i].c_str(), "", &options) != 0) {
       VLOG(0) << "HeterClient channel init fail. Try Again";
-      auto ip_port = paddle::string::Split(previous_xpu_list_[i], ':');
+      auto ip_port = ::paddle::string::Split(previous_xpu_list_[i], ':');
       std::string ip = ip_port[0];
       int port = std::stoi(ip_port[1]);
       std::string int_ip_port = GetIntTypeEndpoint(ip, port);
@@ -181,11 +181,11 @@ void HeterClient::SendAndRecvAsync(
 std::future<int32_t> HeterClient::SendCmd(
     uint32_t table_id, int cmd_id, const std::vector<std::string>& params) {
   size_t request_call_num = xpu_channels_.size();
-  paddle::distributed::DownpourBrpcClosure* closure =
-      new paddle::distributed::DownpourBrpcClosure(
+  ::paddle::distributed::DownpourBrpcClosure* closure =
+      new ::paddle::distributed::DownpourBrpcClosure(
           request_call_num, [request_call_num, cmd_id](void* done) {
             int ret = 0;
-            auto* closure = (paddle::distributed::DownpourBrpcClosure*)done;
+            auto* closure = (::paddle::distributed::DownpourBrpcClosure*)done;
             for (size_t i = 0; i < request_call_num; ++i) {
               if (closure->check_response(i, cmd_id) != 0) {
                 ret = -1;
diff --git a/paddle/fluid/distributed/ps/service/heter_client.h b/paddle/fluid/distributed/ps/service/heter_client.h
index 10d916b7100..e6c231338ac 100755
--- a/paddle/fluid/distributed/ps/service/heter_client.h
+++ b/paddle/fluid/distributed/ps/service/heter_client.h
@@ -42,7 +42,7 @@ class Scope;
 
 namespace paddle {
 namespace distributed {
-DECLARE_int32(pserver_timeout_ms);
+PD_DECLARE_int32(pserver_timeout_ms);
 using MultiVarMsg = ::paddle::distributed::MultiVariableMessage;
 using VarMsg = ::paddle::distributed::VariableMessage;
 
@@ -116,7 +116,7 @@ class HeterClient {
       if ((*client_channels)[i]->Init(node_list[i].c_str(), "", &options) !=
           0) {
         VLOG(0) << "client channel init failed! try again";
-        auto ip_port = paddle::string::Split(node_list[i], ':');
+        auto ip_port = ::paddle::string::Split(node_list[i], ':');
         std::string ip = ip_port[0];
         int port = std::stoi(ip_port[1]);
         std::string int_ip_port = GetIntTypeEndpoint(ip, port);
diff --git a/paddle/fluid/distributed/ps/service/heter_server.cc b/paddle/fluid/distributed/ps/service/heter_server.cc
index d5d8546bf79..eb4d9b83045 100644
--- a/paddle/fluid/distributed/ps/service/heter_server.cc
+++ b/paddle/fluid/distributed/ps/service/heter_server.cc
@@ -18,8 +18,8 @@
 
 namespace paddle {
 namespace distributed {
-// DEFINE_string(cert_path, "./cert.pem", "cert.pem path");
-// DEFINE_string(key_path, "./key.pem", "key.pem path");
+// PD_DEFINE_string(cert_path, "./cert.pem", "cert.pem path");
+// PD_DEFINE_string(key_path, "./key.pem", "key.pem path");
 std::shared_ptr<HeterServer> HeterServer::s_instance_ = nullptr;
 std::mutex HeterServer::mtx_;
 
@@ -37,7 +37,7 @@ void HeterServer::StartHeterService(bool neeed_encrypt) {
   }
   if (server_.Start(endpoint_.c_str(), &options) != 0) {
     VLOG(0) << "HeterServer start fail. Try again.";
-    auto ip_port = paddle::string::Split(endpoint_, ':');
+    auto ip_port = ::paddle::string::Split(endpoint_, ':');
     std::string ip = ip_port[0];
     int port = std::stoi(ip_port[1]);
     std::string int_ip_port = GetIntTypeEndpoint(ip, port);
@@ -72,7 +72,7 @@ void HeterServer::StartHeterInterService(bool neeed_encrypt) {
   }
   if (server_inter_.Start(endpoint_inter_.c_str(), &options) != 0) {
     VLOG(4) << "switch inter server start fail. Try again.";
-    auto ip_port = paddle::string::Split(endpoint_inter_, ':');
+    auto ip_port = ::paddle::string::Split(endpoint_inter_, ':');
     std::string ip = ip_port[0];
     int port = std::stoi(ip_port[1]);
     std::string int_ip_port = GetIntTypeEndpoint(ip, port);
diff --git a/paddle/fluid/distributed/ps/service/heter_server.h b/paddle/fluid/distributed/ps/service/heter_server.h
index c4e9d05ac13..1f134d4db18 100755
--- a/paddle/fluid/distributed/ps/service/heter_server.h
+++ b/paddle/fluid/distributed/ps/service/heter_server.h
@@ -57,9 +57,9 @@ PHI_DECLARE_double(eager_delete_tensor_gb);
 namespace paddle {
 namespace distributed {
 
-DECLARE_int32(pserver_timeout_ms);
-DECLARE_int32(heter_world_size);
-DECLARE_int32(switch_send_recv_timeout_s);
+PD_DECLARE_int32(pserver_timeout_ms);
+PD_DECLARE_int32(heter_world_size);
+PD_DECLARE_int32(switch_send_recv_timeout_s);
 
 using MultiVarMsg = MultiVariableMessage;
 using VarMsg = VariableMessage;
@@ -216,8 +216,8 @@ class SendAndRecvVariableHandler final : public ServiceHandlerBase {
     // get microID from request
     // deserialize variable to micro scope
     // Push to heter worker's task_queue
-    std::unique_ptr<paddle::framework::Scope> local_scope_ptr(
-        new paddle::framework::Scope());
+    std::unique_ptr<::paddle::framework::Scope> local_scope_ptr(
+        new ::paddle::framework::Scope());
     auto& local_scope = *(local_scope_ptr.get());
     platform::DeviceContextPool& pool = platform::DeviceContextPool::Instance();
     platform::CPUPlace cpu_place;
@@ -257,7 +257,7 @@ class SendAndRecvVariableHandler final : public ServiceHandlerBase {
       auto* minibatch_scope = &(scope_->NewScope());
       (*mini_scopes_)[minibatch_index] = minibatch_scope;
       (*micro_scopes_)[minibatch_index].reset(
-          new std::vector<paddle::framework::Scope*>{});
+          new std::vector<::paddle::framework::Scope*>{});
       for (int i = 0; i < num_microbatch_; i++) {
         auto* micro_scope = &(minibatch_scope->NewScope());
         (*((*micro_scopes_)[minibatch_index])).push_back(micro_scope);
@@ -300,7 +300,7 @@ class SendAndRecvVariableHandler final : public ServiceHandlerBase {
 
  public:
   using shard_type = SparseTableShard<std::string, ValueInSwitch>;
-  std::shared_ptr<paddle::framework::Scope> local_scope_ptr;  // for switch
+  std::shared_ptr<::paddle::framework::Scope> local_scope_ptr;  // for switch
   std::unordered_map<uint32_t, std::unordered_map<std::string, uint32_t>>
       vars_ready_flag;
   std::unique_ptr<shard_type[]> _local_shards;
@@ -344,7 +344,7 @@ class HeterService : public PsService {
                   std::placeholders::_3);
 
     service_handler_.local_scope_ptr =
-        std::make_shared<paddle::framework::Scope>();
+        std::make_shared<::paddle::framework::Scope>();
   }
 
   virtual ~HeterService() {}
@@ -613,7 +613,7 @@ class HeterServer {
 
   void SetLocalScope() {
     request_handler_->local_scope_ptr =
-        std::make_shared<paddle::framework::Scope>();
+        std::make_shared<::paddle::framework::Scope>();
   }
 
   void SetInterEndpoint(const std::string& endpoint) {
diff --git a/paddle/fluid/distributed/ps/service/ps_client.cc b/paddle/fluid/distributed/ps/service/ps_client.cc
index 3dd2b8c775c..ecdcd8b8743 100644
--- a/paddle/fluid/distributed/ps/service/ps_client.cc
+++ b/paddle/fluid/distributed/ps/service/ps_client.cc
@@ -37,7 +37,8 @@ REGISTER_PSCORE_CLASS(PSClient, PsGraphClient);
 
 int32_t PSClient::Configure(  // called in FleetWrapper::InitWorker
     const PSParameter &config,
-    const std::map<uint64_t, std::vector<paddle::distributed::Region>> &regions,
+    const std::map<uint64_t, std::vector<::paddle::distributed::Region>>
+        &regions,
     PSEnvironment &env,
     size_t client_id) {
   _env = &env;
@@ -88,7 +89,7 @@ PSClient *PSClientFactory::Create(const PSParameter &ps_config) {
 
   PSClient *client = NULL;
 #if defined(PADDLE_WITH_GLOO) && defined(PADDLE_WITH_GPU_GRAPH)
-  auto gloo = paddle::framework::GlooWrapper::GetInstance();
+  auto gloo = ::paddle::framework::GlooWrapper::GetInstance();
   if (client_name == "PsLocalClient" && gloo->Size() > 1) {
     client = CREATE_PSCORE_CLASS(PSClient, "PsGraphClient");
     LOG(WARNING) << "change PsLocalClient to PsGraphClient";
diff --git a/paddle/fluid/distributed/ps/service/ps_service/graph_py_service.h b/paddle/fluid/distributed/ps/service/ps_service/graph_py_service.h
index 33490281981..44836e7661b 100644
--- a/paddle/fluid/distributed/ps/service/ps_service/graph_py_service.h
+++ b/paddle/fluid/distributed/ps/service/ps_service/graph_py_service.h
@@ -143,13 +143,13 @@ class GraphPyServer : public GraphPyService {
 
   void start_server(bool block = true);
   ::paddle::distributed::PSParameter GetServerProto();
-  std::shared_ptr<paddle::distributed::GraphBrpcServer> get_ps_server() {
+  std::shared_ptr<::paddle::distributed::GraphBrpcServer> get_ps_server() {
     return pserver_ptr;
   }
 
  protected:
   int rank;
-  std::shared_ptr<paddle::distributed::GraphBrpcServer> pserver_ptr;
+  std::shared_ptr<::paddle::distributed::GraphBrpcServer> pserver_ptr;
   std::thread* server_thread;
 };
 class GraphPyClient : public GraphPyService {
@@ -162,14 +162,14 @@ class GraphPyClient : public GraphPyService {
     set_client_id(client_id);
     GraphPyService::set_up(ips_str, shard_num, node_types, edge_types);
   }
-  std::shared_ptr<paddle::distributed::GraphBrpcClient> get_ps_client() {
+  std::shared_ptr<::paddle::distributed::GraphBrpcClient> get_ps_client() {
     return worker_ptr;
   }
   void bind_local_server(int local_channel_index,
                          GraphPyServer& server) {  // NOLINT
     worker_ptr->set_local_channel(local_channel_index);
     worker_ptr->set_local_graph_service(
-        (paddle::distributed::GraphBrpcService*)server.get_ps_server()
+        (::paddle::distributed::GraphBrpcService*)server.get_ps_server()
             ->get_service());
   }
   void StopServer();
@@ -209,7 +209,7 @@ class GraphPyClient : public GraphPyService {
  protected:
   mutable std::mutex mutex_;
   int client_id;
-  std::shared_ptr<paddle::distributed::GraphBrpcClient> worker_ptr;
+  std::shared_ptr<::paddle::distributed::GraphBrpcClient> worker_ptr;
   std::thread* client_thread;
   bool stoped_ = false;
 };
diff --git a/paddle/fluid/distributed/ps/service/ps_service/service.cc b/paddle/fluid/distributed/ps/service/ps_service/service.cc
index d62cdb4c133..e66475e88d8 100644
--- a/paddle/fluid/distributed/ps/service/ps_service/service.cc
+++ b/paddle/fluid/distributed/ps/service/ps_service/service.cc
@@ -28,9 +28,9 @@ using namespace std;  // NOLINT
 namespace paddle {
 namespace distributed {
 
-paddle::distributed::PSParameter load_from_prototxt(
+::paddle::distributed::PSParameter load_from_prototxt(
     const std::string& filename) {
-  paddle::distributed::PSParameter param;
+  ::paddle::distributed::PSParameter param;
   int file_descriptor = open(filename.c_str(), O_RDONLY);
 
   if (file_descriptor == -1) {
@@ -50,7 +50,7 @@ paddle::distributed::PSParameter load_from_prototxt(
 
 void PSCore::InitGFlag(const std::string& gflags) {
   VLOG(3) << "Init With Gflags:" << gflags;
-  std::vector<std::string> flags = paddle::string::split_string(gflags);
+  std::vector<std::string> flags = ::paddle::string::split_string(gflags);
   if (flags.empty()) {
     flags.push_back("-max_body_size=314217728");
     flags.push_back("-socket_max_unwritten_bytes=2048000000");
@@ -64,7 +64,7 @@ void PSCore::InitGFlag(const std::string& gflags) {
   }
   int params_cnt = flags.size();
   char** params_ptr = &(flags_ptr[0]);
-  ::GFLAGS_NAMESPACE::ParseCommandLineFlags(&params_cnt, &params_ptr, true);
+  ::paddle::flags::ParseCommandLineFlags(&params_cnt, &params_ptr);
 }
 
 int PSCore::InitServer(
@@ -76,12 +76,12 @@ int PSCore::InitServer(
     const std::vector<framework::ProgramDesc>& server_sub_program) {
   google::protobuf::TextFormat::ParseFromString(dist_desc, &_ps_param);
   InitGFlag(_ps_param.init_gflags());
-  _ps_env = paddle::distributed::PaddlePSEnvironment();
+  _ps_env = ::paddle::distributed::PaddlePSEnvironment();
   _ps_env.SetPsServers(host_sign_list, node_num);
   _ps_env.SetTrainers(trainers);
   int ret = 0;
-  _server_ptr = std::shared_ptr<paddle::distributed::PSServer>(
-      paddle::distributed::PSServerFactory::Create(_ps_param));
+  _server_ptr = std::shared_ptr<::paddle::distributed::PSServer>(
+      ::paddle::distributed::PSServerFactory::Create(_ps_param));
   ret = _server_ptr->Configure(_ps_param, _ps_env, index, server_sub_program);
   CHECK(ret == 0) << "failed to configure server";
   return ret;
@@ -89,13 +89,14 @@ int PSCore::InitServer(
 
 int PSCore::InitWorker(
     const std::string& dist_desc,
-    const std::map<uint64_t, std::vector<paddle::distributed::Region>>& regions,
+    const std::map<uint64_t, std::vector<::paddle::distributed::Region>>&
+        regions,
     const std::vector<std::string>* host_sign_list,
     int node_num,
     int index) {
   google::protobuf::TextFormat::ParseFromString(dist_desc, &_ps_param);
   InitGFlag(_ps_param.init_gflags());
-  _ps_env = paddle::distributed::PaddlePSEnvironment();
+  _ps_env = ::paddle::distributed::PaddlePSEnvironment();
   _ps_env.SetPsServers(host_sign_list, node_num);
   int ret = 0;
   VLOG(1) << "PSCore::InitWorker";
@@ -132,6 +133,6 @@ int PSCore::StopServer() {
   stop_status.wait();
   return 0;
 }
-paddle::distributed::PSParameter* PSCore::GetParam() { return &_ps_param; }
+::paddle::distributed::PSParameter* PSCore::GetParam() { return &_ps_param; }
 }  // namespace distributed
 }  // namespace paddle
diff --git a/paddle/fluid/distributed/ps/service/ps_service/service.h b/paddle/fluid/distributed/ps/service/ps_service/service.h
index eb190073fbd..4c3c6db61e2 100644
--- a/paddle/fluid/distributed/ps/service/ps_service/service.h
+++ b/paddle/fluid/distributed/ps/service/ps_service/service.h
@@ -33,9 +33,9 @@ class PsRequestMessage;
 class PsResponseMessage;
 class PsService;
 
-using paddle::distributed::PsRequestMessage;
-using paddle::distributed::PsResponseMessage;
-using paddle::distributed::PsService;
+using ::paddle::distributed::PsRequestMessage;
+using ::paddle::distributed::PsResponseMessage;
+using ::paddle::distributed::PsService;
 
 class PSCore {
  public:
@@ -51,7 +51,7 @@ class PSCore {
       const std::vector<framework::ProgramDesc>& server_sub_program = {});
   virtual int InitWorker(
       const std::string& dist_desc,
-      const std::map<uint64_t, std::vector<paddle::distributed::Region>>&
+      const std::map<uint64_t, std::vector<::paddle::distributed::Region>>&
           regions,
       const std::vector<std::string>* host_sign_list,
       int node_num,
@@ -63,16 +63,16 @@ class PSCore {
   virtual int CreateClient2ClientConnection(int pserver_timeout_ms,
                                             int pserver_connect_timeout_ms,
                                             int max_retry);
-  std::shared_ptr<paddle::distributed::PSServer>
+  std::shared_ptr<::paddle::distributed::PSServer>
       _server_ptr;  // pointer to server
-  std::shared_ptr<paddle::distributed::PSClient>
+  std::shared_ptr<::paddle::distributed::PSClient>
       _worker_ptr;  // pointer to worker
-  virtual paddle::distributed::PSParameter* GetParam();
+  virtual ::paddle::distributed::PSParameter* GetParam();
 
  private:
   void InitGFlag(const std::string& gflags);
-  paddle::distributed::PSParameter _ps_param;
-  paddle::distributed::PaddlePSEnvironment _ps_env;
+  ::paddle::distributed::PSParameter _ps_param;
+  ::paddle::distributed::PaddlePSEnvironment _ps_env;
 };
 
 }  // namespace distributed
diff --git a/paddle/fluid/distributed/ps/service/server.h b/paddle/fluid/distributed/ps/service/server.h
index 48b32d22cac..fc1d4a2bd34 100644
--- a/paddle/fluid/distributed/ps/service/server.h
+++ b/paddle/fluid/distributed/ps/service/server.h
@@ -57,8 +57,8 @@ namespace distributed {
 
 class Table;
 
-using paddle::distributed::PsRequestMessage;
-using paddle::distributed::PsResponseMessage;
+using ::paddle::distributed::PsRequestMessage;
+using ::paddle::distributed::PsResponseMessage;
 
 class PSServer {
  public:
@@ -134,7 +134,7 @@ class PSServer {
     return -1;
   }
 
-  paddle::framework::Channel<std::pair<uint64_t, std::string>> _shuffled_ins;
+  ::paddle::framework::Channel<std::pair<uint64_t, std::string>> _shuffled_ins;
 
  protected:
   virtual int32_t Initialize() = 0;
diff --git a/paddle/fluid/distributed/ps/service/simple_rpc/baidu_rpc_server.cc b/paddle/fluid/distributed/ps/service/simple_rpc/baidu_rpc_server.cc
index f3e501dd00c..a10e78fe941 100644
--- a/paddle/fluid/distributed/ps/service/simple_rpc/baidu_rpc_server.cc
+++ b/paddle/fluid/distributed/ps/service/simple_rpc/baidu_rpc_server.cc
@@ -19,8 +19,8 @@
 #include "paddle/phi/core/enforce.h"
 
 namespace brpc {
-DECLARE_uint64(max_body_size);
-DECLARE_int64(socket_max_unwritten_bytes);
+PD_DECLARE_uint64(max_body_size);
+PD_DECLARE_int64(socket_max_unwritten_bytes);
 }  // namespace brpc
 
 namespace paddle {
diff --git a/paddle/fluid/distributed/ps/table/common_graph_table.cc b/paddle/fluid/distributed/ps/table/common_graph_table.cc
index 316b2295c33..153c67317d5 100644
--- a/paddle/fluid/distributed/ps/table/common_graph_table.cc
+++ b/paddle/fluid/distributed/ps/table/common_graph_table.cc
@@ -58,14 +58,14 @@ int32_t GraphTable::Load_to_ssd(const std::string &path,
   return 0;
 }
 
-paddle::framework::GpuPsCommGraphFea GraphTable::make_gpu_ps_graph_fea(
+::paddle::framework::GpuPsCommGraphFea GraphTable::make_gpu_ps_graph_fea(
     int gpu_id, std::vector<uint64_t> &node_ids, int slot_num) {
   size_t shard_num = 64;
   std::vector<std::vector<uint64_t>> bags(shard_num);
   std::vector<uint64_t> feature_array[shard_num];
   std::vector<uint8_t> slot_id_array[shard_num];
   std::vector<uint64_t> node_id_array[shard_num];
-  std::vector<paddle::framework::GpuPsFeaInfo> node_fea_info_array[shard_num];
+  std::vector<::paddle::framework::GpuPsFeaInfo> node_fea_info_array[shard_num];
   for (size_t i = 0; i < shard_num; i++) {
     auto predsize = node_ids.size() / shard_num;
     bags[i].reserve(predsize * 1.2);
@@ -92,7 +92,7 @@ paddle::framework::GpuPsCommGraphFea GraphTable::make_gpu_ps_graph_fea(
     if (bags[i].size() > 0) {
       tasks.push_back(_cpu_worker_pool[gpu_id]->enqueue([&, i, this]() -> int {
         uint64_t node_id;
-        paddle::framework::GpuPsFeaInfo x;
+        ::paddle::framework::GpuPsFeaInfo x;
         std::vector<uint64_t> feature_ids;
         for (size_t j = 0; j < bags[i].size(); j++) {
           Node *v = find_node(GraphTableType::FEATURE_TABLE, bags[i][j]);
@@ -134,7 +134,7 @@ paddle::framework::GpuPsCommGraphFea GraphTable::make_gpu_ps_graph_fea(
 
   tasks.clear();
 
-  paddle::framework::GpuPsCommGraphFea res;
+  ::paddle::framework::GpuPsCommGraphFea res;
   uint64_t tot_len = 0;
   for (size_t i = 0; i < shard_num; i++) {
     tot_len += feature_array[i].size();
@@ -165,7 +165,7 @@ paddle::framework::GpuPsCommGraphFea GraphTable::make_gpu_ps_graph_fea(
   return res;
 }
 
-paddle::framework::GpuPsCommGraph GraphTable::make_gpu_ps_graph(
+::paddle::framework::GpuPsCommGraph GraphTable::make_gpu_ps_graph(
     int idx, const std::vector<uint64_t> &ids) {
   std::vector<std::vector<uint64_t>> bags(task_pool_size_);
   for (int i = 0; i < task_pool_size_; i++) {
@@ -179,7 +179,7 @@ paddle::framework::GpuPsCommGraph GraphTable::make_gpu_ps_graph(
 
   std::vector<std::future<int>> tasks;
   std::vector<uint64_t> node_array[task_pool_size_];  // node id list
-  std::vector<paddle::framework::GpuPsNodeInfo> info_array[task_pool_size_];
+  std::vector<::paddle::framework::GpuPsNodeInfo> info_array[task_pool_size_];
   std::vector<uint64_t> edge_array[task_pool_size_];  // edge id list
 
   for (size_t i = 0; i < bags.size(); i++) {
@@ -215,7 +215,7 @@ paddle::framework::GpuPsCommGraph GraphTable::make_gpu_ps_graph(
     tot_len += edge_array[i].size();
   }
 
-  paddle::framework::GpuPsCommGraph res;
+  ::paddle::framework::GpuPsCommGraph res;
   res.init_on_cpu(tot_len, ids.size());
   int64_t offset = 0, ind = 0;
   for (int i = 0; i < task_pool_size_; i++) {
@@ -516,13 +516,13 @@ void GraphTable::release_graph() {
   build_graph_type_keys();
 
   if (FLAGS_gpugraph_storage_mode ==
-      paddle::framework::GpuGraphStorageMode::WHOLE_HBM) {
+      ::paddle::framework::GpuGraphStorageMode::WHOLE_HBM) {
     build_graph_total_keys();
   }
   // clear graph
-  if (FLAGS_gpugraph_storage_mode == paddle::framework::GpuGraphStorageMode::
+  if (FLAGS_gpugraph_storage_mode == ::paddle::framework::GpuGraphStorageMode::
                                          MEM_EMB_FEATURE_AND_GPU_GRAPH ||
-      FLAGS_gpugraph_storage_mode == paddle::framework::GpuGraphStorageMode::
+      FLAGS_gpugraph_storage_mode == ::paddle::framework::GpuGraphStorageMode::
                                          SSD_EMB_AND_MEM_FEATURE_GPU_GRAPH) {
     clear_edge_shard();
   } else {
@@ -532,7 +532,7 @@ void GraphTable::release_graph() {
 
 void GraphTable::release_graph_edge() {
   if (FLAGS_gpugraph_storage_mode ==
-      paddle::framework::GpuGraphStorageMode::WHOLE_HBM) {
+      ::paddle::framework::GpuGraphStorageMode::WHOLE_HBM) {
     build_graph_total_keys();
   }
   clear_edge_shard();
@@ -543,10 +543,12 @@ void GraphTable::release_graph_node() {
   if (FLAGS_graph_metapath_split_opt) {
     clear_feature_shard();
   } else {
-    if (FLAGS_gpugraph_storage_mode != paddle::framework::GpuGraphStorageMode::
-                                           MEM_EMB_FEATURE_AND_GPU_GRAPH &&
-        FLAGS_gpugraph_storage_mode != paddle::framework::GpuGraphStorageMode::
-                                           SSD_EMB_AND_MEM_FEATURE_GPU_GRAPH) {
+    if (FLAGS_gpugraph_storage_mode !=
+            ::paddle::framework::GpuGraphStorageMode::
+                MEM_EMB_FEATURE_AND_GPU_GRAPH &&
+        FLAGS_gpugraph_storage_mode !=
+            ::paddle::framework::GpuGraphStorageMode::
+                SSD_EMB_AND_MEM_FEATURE_GPU_GRAPH) {
       clear_feature_shard();
     } else {
       merge_feature_shard();
@@ -666,7 +668,7 @@ int32_t GraphTable::load_edges_to_ssd(const std::string &path,
     idx = edge_to_id[edge_type];
   }
   total_memory_cost = 0;
-  auto paths = paddle::string::split_string<std::string>(path, ";");
+  auto paths = ::paddle::string::split_string<std::string>(path, ";");
   int64_t count = 0;
   std::string sample_type = "random";
   for (auto path : paths) {
@@ -674,11 +676,12 @@ int32_t GraphTable::load_edges_to_ssd(const std::string &path,
     std::string line;
     while (std::getline(file, line)) {
       VLOG(0) << "get a line from file " << line;
-      auto values = paddle::string::split_string<std::string>(line, "\t");
+      auto values = ::paddle::string::split_string<std::string>(line, "\t");
       count++;
       if (values.size() < 2) continue;
       auto src_id = std::stoll(values[0]);
-      auto dist_ids = paddle::string::split_string<std::string>(values[1], ";");
+      auto dist_ids =
+          ::paddle::string::split_string<std::string>(values[1], ";");
       std::vector<uint64_t> dist_data;
       for (auto x : dist_ids) {
         dist_data.push_back(std::stoll(x));
@@ -798,7 +801,7 @@ int CompleteGraphSampler::run_graph_sampling() {
   sample_nodes.resize(gpu_num);
   sample_neighbors.resize(gpu_num);
   sample_res.resize(gpu_num);
-  std::vector<std::vector<std::vector<paddle::framework::GpuPsGraphNode>>>
+  std::vector<std::vector<std::vector<::paddle::framework::GpuPsGraphNode>>>
       sample_nodes_ex(graph_table->task_pool_size_);
   std::vector<std::vector<std::vector<int64_t>>> sample_neighbors_ex(
       graph_table->task_pool_size_);
@@ -812,7 +815,7 @@ int CompleteGraphSampler::run_graph_sampling() {
         graph_table->_shards_task_pool[i % graph_table->task_pool_size_]
             ->enqueue([&, i, this]() -> int {
               if (this->status == GraphSamplerStatus::terminating) return 0;
-              paddle::framework::GpuPsGraphNode node;
+              ::paddle::framework::GpuPsGraphNode node;
               std::vector<Node *> &v =
                   this->graph_table->shards[i]->get_bucket();
               size_t ind = i % this->graph_table->task_pool_size_;
@@ -962,7 +965,7 @@ int BasicBfsGraphSampler::run_graph_sampling() {
     sample_nodes.resize(gpu_num);
     sample_neighbors.resize(gpu_num);
     sample_res.resize(gpu_num);
-    std::vector<std::vector<std::vector<paddle::framework::GpuPsGraphNode>>>
+    std::vector<std::vector<std::vector<::paddle::framework::GpuPsGraphNode>>>
         sample_nodes_ex(graph_table->task_pool_size_);
     std::vector<std::vector<std::vector<int64_t>>> sample_neighbors_ex(
         graph_table->task_pool_size_);
@@ -977,7 +980,7 @@ int BasicBfsGraphSampler::run_graph_sampling() {
             if (this->status == GraphSamplerStatus::terminating) {
               return 0;
             }
-            paddle::framework::GpuPsGraphNode node;
+            ::paddle::framework::GpuPsGraphNode node;
             auto iter = sample_neighbors_map[i].begin();
             size_t ind = i;
             for (; iter != sample_neighbors_map[i].end(); iter++) {
@@ -1237,7 +1240,7 @@ int32_t GraphTable::Load(const std::string &path, const std::string &param) {
 }
 
 std::string GraphTable::get_inverse_etype(std::string &etype) {
-  auto etype_split = paddle::string::split_string<std::string>(etype, "2");
+  auto etype_split = ::paddle::string::split_string<std::string>(etype, "2");
   std::string res;
   if (etype_split.size() == 3) {
     res = etype_split[2] + "2" + etype_split[1] + "2" + etype_split[0];
@@ -1253,13 +1256,13 @@ int32_t GraphTable::parse_type_to_typepath(
     std::vector<std::string> &res_type,
     std::unordered_map<std::string, std::string> &res_type2path) {
   auto type2files_split =
-      paddle::string::split_string<std::string>(type2files, ",");
+      ::paddle::string::split_string<std::string>(type2files, ",");
   if (type2files_split.empty()) {
     return -1;
   }
   for (auto one_type2file : type2files_split) {
     auto one_type2file_split =
-        paddle::string::split_string<std::string>(one_type2file, ":");
+        ::paddle::string::split_string<std::string>(one_type2file, ":");
     auto type = one_type2file_split[0];
     auto type_dir = one_type2file_split[1];
     res_type.push_back(type);
@@ -1304,17 +1307,17 @@ int32_t GraphTable::parse_edge_and_load(
             VLOG(1) << "only_load_reverse_edge is False, etype[" << etypes[i]
                     << "], file_path[" << etype_path << "]";
           }
-          auto etype_path_list = paddle::framework::localfs_list(etype_path);
+          auto etype_path_list = ::paddle::framework::localfs_list(etype_path);
           std::string etype_path_str;
           if (part_num > 0 &&
               part_num < static_cast<int>(etype_path_list.size())) {
             std::vector<std::string> sub_etype_path_list(
                 etype_path_list.begin(), etype_path_list.begin() + part_num);
             etype_path_str =
-                paddle::string::join_strings(sub_etype_path_list, delim);
+                ::paddle::string::join_strings(sub_etype_path_list, delim);
           } else {
             etype_path_str =
-                paddle::string::join_strings(etype_path_list, delim);
+                ::paddle::string::join_strings(etype_path_list, delim);
           }
           if (!only_load_reverse_edge) {
             this->load_edges(etype_path_str, false, etypes[i]);
@@ -1345,14 +1348,14 @@ int32_t GraphTable::parse_node_and_load(std::string ntype2files,
   }
   std::string delim = ";";
   std::string npath = node_to_nodedir[ntypes[0]];
-  auto npath_list = paddle::framework::localfs_list(npath);
+  auto npath_list = ::paddle::framework::localfs_list(npath);
   std::string npath_str;
   if (part_num > 0 && part_num < static_cast<int>(npath_list.size())) {
     std::vector<std::string> sub_npath_list(npath_list.begin(),
                                             npath_list.begin() + part_num);
-    npath_str = paddle::string::join_strings(sub_npath_list, delim);
+    npath_str = ::paddle::string::join_strings(sub_npath_list, delim);
   } else {
-    npath_str = paddle::string::join_strings(npath_list, delim);
+    npath_str = ::paddle::string::join_strings(npath_list, delim);
   }
 
   if (ntypes.empty()) {
@@ -1425,17 +1428,18 @@ int32_t GraphTable::load_node_and_edge_file(
               VLOG(1) << "only_load_reverse_edge is False, etype[" << etypes[i]
                       << "], file_path[" << etype_path << "]";
             }
-            auto etype_path_list = paddle::framework::localfs_list(etype_path);
+            auto etype_path_list =
+                ::paddle::framework::localfs_list(etype_path);
             std::string etype_path_str;
             if (part_num > 0 &&
                 part_num < static_cast<int>(etype_path_list.size())) {
               std::vector<std::string> sub_etype_path_list(
                   etype_path_list.begin(), etype_path_list.begin() + part_num);
               etype_path_str =
-                  paddle::string::join_strings(sub_etype_path_list, delim);
+                  ::paddle::string::join_strings(sub_etype_path_list, delim);
             } else {
               etype_path_str =
-                  paddle::string::join_strings(etype_path_list, delim);
+                  ::paddle::string::join_strings(etype_path_list, delim);
             }
             if (!only_load_reverse_edge) {
               this->load_edges(etype_path_str, false, etypes[i]);
@@ -1448,15 +1452,15 @@ int32_t GraphTable::load_node_and_edge_file(
             }
           } else {
             std::string npath = node_to_nodedir[ntypes[0]];
-            auto npath_list = paddle::framework::localfs_list(npath);
+            auto npath_list = ::paddle::framework::localfs_list(npath);
             std::string npath_str;
             if (part_num > 0 &&
                 part_num < static_cast<int>(npath_list.size())) {
               std::vector<std::string> sub_npath_list(
                   npath_list.begin(), npath_list.begin() + part_num);
-              npath_str = paddle::string::join_strings(sub_npath_list, delim);
+              npath_str = ::paddle::string::join_strings(sub_npath_list, delim);
             } else {
-              npath_str = paddle::string::join_strings(npath_list, delim);
+              npath_str = ::paddle::string::join_strings(npath_list, delim);
             }
 
             if (ntypes.empty()) {
@@ -1553,14 +1557,14 @@ std::pair<uint64_t, uint64_t> GraphTable::parse_node_file(
   uint64_t local_valid_count = 0;
 
   int num = 0;
-  std::vector<paddle::string::str_ptr> vals;
+  std::vector<::paddle::string::str_ptr> vals;
   size_t n = node_type.length();
   while (std::getline(file, line)) {
     if (strncmp(line.c_str(), node_type.c_str(), n) != 0) {
       continue;
     }
     vals.clear();
-    num = paddle::string::split_string_ptr(
+    num = ::paddle::string::split_string_ptr(
         line.c_str() + n + 1, line.length() - n - 1, '\t', &vals);
     if (num == 0) {
       continue;
@@ -1603,15 +1607,15 @@ std::pair<uint64_t, uint64_t> GraphTable::parse_node_file(
   uint64_t local_valid_count = 0;
   int idx = 0;
 
-  auto path_split = paddle::string::split_string<std::string>(path, "/");
+  auto path_split = ::paddle::string::split_string<std::string>(path, "/");
   auto path_name = path_split[path_split.size() - 1];
 
   int num = 0;
-  std::vector<paddle::string::str_ptr> vals;
+  std::vector<::paddle::string::str_ptr> vals;
 
   while (std::getline(file, line)) {
     vals.clear();
-    num = paddle::string::split_string_ptr(
+    num = ::paddle::string::split_string_ptr(
         line.c_str(), line.length(), '\t', &vals);
     if (vals.empty()) {
       continue;
@@ -1654,7 +1658,7 @@ std::pair<uint64_t, uint64_t> GraphTable::parse_node_file(
 
 // // TODO(danleifeng): opt load all node_types in once reading
 int32_t GraphTable::load_nodes(const std::string &path, std::string node_type) {
-  auto paths = paddle::string::split_string<std::string>(path, ";");
+  auto paths = ::paddle::string::split_string<std::string>(path, ";");
   uint64_t count = 0;
   uint64_t valid_count = 0;
   int idx = 0;
@@ -1725,8 +1729,8 @@ std::pair<uint64_t, uint64_t> GraphTable::parse_edge_file(
   uint64_t local_valid_count = 0;
   uint64_t part_num = 0;
   if (FLAGS_graph_load_in_parallel) {
-    auto path_split = paddle::string::split_string<std::string>(path, "/");
-    auto part_name_split = paddle::string::split_string<std::string>(
+    auto path_split = ::paddle::string::split_string<std::string>(path, "/");
+    auto part_name_split = ::paddle::string::split_string<std::string>(
         path_split[path_split.size() - 1], "-");
     part_num = std::stoull(part_name_split[part_name_split.size() - 1]);
   }
@@ -1793,7 +1797,7 @@ int32_t GraphTable::load_edges(const std::string &path,
     idx = edge_to_id[edge_type];
   }
 
-  auto paths = paddle::string::split_string<std::string>(path, ";");
+  auto paths = ::paddle::string::split_string<std::string>(path, ";");
   uint64_t count = 0;
   uint64_t valid_count = 0;
 
@@ -1865,7 +1869,7 @@ Node *GraphTable::find_node(GraphTableType table_type, uint64_t id) {
       table_type == GraphTableType::EDGE_TABLE ? edge_shards : feature_shards;
   for (auto &search_shard : search_shards) {
     PADDLE_ENFORCE_NOT_NULL(search_shard[index],
-                            paddle::platform::errors::InvalidArgument(
+                            ::paddle::platform::errors::InvalidArgument(
                                 "search_shard[%d] should not be null.", index));
     node = search_shard[index]->find_node(id);
     if (node != nullptr) {
@@ -1885,7 +1889,7 @@ Node *GraphTable::find_node(GraphTableType table_type, int idx, uint64_t id) {
                             ? edge_shards[idx]
                             : feature_shards[idx];
   PADDLE_ENFORCE_NOT_NULL(search_shards[index],
-                          paddle::platform::errors::InvalidArgument(
+                          ::paddle::platform::errors::InvalidArgument(
                               "search_shard[%d] should not be null.", index));
   Node *node = search_shards[index]->find_node(id);
   return node;
@@ -2164,8 +2168,8 @@ void string_vector_2_string(std::vector<std::string>::iterator strs_begin,
 }
 
 void string_vector_2_string(
-    std::vector<paddle::string::str_ptr>::iterator strs_begin,
-    std::vector<paddle::string::str_ptr>::iterator strs_end,
+    std::vector<::paddle::string::str_ptr>::iterator strs_begin,
+    std::vector<::paddle::string::str_ptr>::iterator strs_end,
     char delim,
     std::string *output) {
   size_t i = 0;
@@ -2184,19 +2188,19 @@ int GraphTable::parse_feature(int idx,
                               FeatureNode *node) {
   // Return (feat_id, btyes) if name are in this->feat_name, else return (-1,
   // "")
-  thread_local std::vector<paddle::string::str_ptr> fields;
+  thread_local std::vector<::paddle::string::str_ptr> fields;
   fields.clear();
   char c = slot_feature_separator_.at(0);
-  paddle::string::split_string_ptr(feat_str, len, c, &fields);
+  ::paddle::string::split_string_ptr(feat_str, len, c, &fields);
 
-  thread_local std::vector<paddle::string::str_ptr> fea_fields;
+  thread_local std::vector<::paddle::string::str_ptr> fea_fields;
   fea_fields.clear();
   c = feature_separator_.at(0);
-  paddle::string::split_string_ptr(fields[1].ptr,
-                                   fields[1].len,
-                                   c,
-                                   &fea_fields,
-                                   FLAGS_gpugraph_slot_feasign_max_num);
+  ::paddle::string::split_string_ptr(fields[1].ptr,
+                                     fields[1].len,
+                                     c,
+                                     &fea_fields,
+                                     FLAGS_gpugraph_slot_feasign_max_num);
   std::string name = fields[0].to_string();
   auto it = feat_id_map[idx].find(name);
   if (it != feat_id_map[idx].end()) {
@@ -2522,14 +2526,14 @@ int32_t GraphTable::Initialize(const TableParameter &config,
 }
 
 void GraphTable::load_node_weight(int type_id, int idx, std::string path) {
-  auto paths = paddle::string::split_string<std::string>(path, ";");
+  auto paths = ::paddle::string::split_string<std::string>(path, ";");
   int64_t count = 0;
   auto &weight_map = node_weight[type_id][idx];
   for (auto path : paths) {
     std::ifstream file(path);
     std::string line;
     while (std::getline(file, line)) {
-      auto values = paddle::string::split_string<std::string>(line, "\t");
+      auto values = ::paddle::string::split_string<std::string>(line, "\t");
       count++;
       if (values.size() < 2) continue;
       auto src_id = std::stoull(values[0]);
@@ -2546,7 +2550,7 @@ int32_t GraphTable::Initialize(const GraphParameter &graph) {
   _db = NULL;
   search_level = graph.search_level();
   if (search_level >= 2) {
-    _db = paddle::distributed::RocksDBHandler::GetInstance();
+    _db = ::paddle::distributed::RocksDBHandler::GetInstance();
     _db->initialize("./temp_gpups_db", task_pool_size_);
   }
 // gpups_mode = true;
diff --git a/paddle/fluid/distributed/ps/table/common_graph_table.h b/paddle/fluid/distributed/ps/table/common_graph_table.h
index 14232736828..91967ccb07c 100644
--- a/paddle/fluid/distributed/ps/table/common_graph_table.h
+++ b/paddle/fluid/distributed/ps/table/common_graph_table.h
@@ -712,9 +712,9 @@ class GraphTable : public Table {
       int &actual_size);  // NOLINT
   virtual int32_t add_node_to_ssd(
       int type_id, int idx, uint64_t src_id, char *data, int len);
-  virtual paddle::framework::GpuPsCommGraph make_gpu_ps_graph(
+  virtual ::paddle::framework::GpuPsCommGraph make_gpu_ps_graph(
       int idx, const std::vector<uint64_t> &ids);
-  virtual paddle::framework::GpuPsCommGraphFea make_gpu_ps_graph_fea(
+  virtual ::paddle::framework::GpuPsCommGraphFea make_gpu_ps_graph_fea(
       int gpu_id, std::vector<uint64_t> &node_ids, int slot_num);  // NOLINT
   int32_t Load_to_ssd(const std::string &path, const std::string &param);
   int64_t load_graph_to_memory_from_ssd(int idx,
@@ -786,7 +786,7 @@ class GraphTable : public Table {
   std::shared_ptr<pthread_rwlock_t> rw_lock;
 #ifdef PADDLE_WITH_HETERPS
   // paddle::framework::GpuPsGraphTable gpu_graph_table;
-  paddle::distributed::RocksDBHandler *_db;
+  ::paddle::distributed::RocksDBHandler *_db;
   // std::shared_ptr<::ThreadPool> graph_sample_pool;
   // std::shared_ptr<GraphSampler> graph_sampler;
   // REGISTER_GRAPH_FRIEND_CLASS(2, CompleteGraphSampler, BasicBfsGraphSampler)
@@ -847,8 +847,8 @@ class BasicBfsGraphSampler : public GraphSampler {
 namespace std {
 
 template <>
-struct hash<paddle::distributed::SampleKey> {
-  size_t operator()(const paddle::distributed::SampleKey &s) const {
+struct hash<::paddle::distributed::SampleKey> {
+  size_t operator()(const ::paddle::distributed::SampleKey &s) const {
     return s.idx ^ s.node_key ^ s.sample_size;
   }
 };
diff --git a/paddle/fluid/distributed/ps/table/ctr_accessor.cc b/paddle/fluid/distributed/ps/table/ctr_accessor.cc
index 61e748a5413..ca634572a46 100644
--- a/paddle/fluid/distributed/ps/table/ctr_accessor.cc
+++ b/paddle/fluid/distributed/ps/table/ctr_accessor.cc
@@ -14,10 +14,9 @@
 
 #include "paddle/fluid/distributed/ps/table/ctr_accessor.h"
 
-#include <gflags/gflags.h>
-
 #include "glog/logging.h"
 #include "paddle/fluid/string/string_helper.h"
+#include "paddle/utils/flags.h"
 
 namespace paddle {
 namespace distributed {
diff --git a/paddle/fluid/distributed/ps/table/ctr_double_accessor.cc b/paddle/fluid/distributed/ps/table/ctr_double_accessor.cc
index 35c27242fe3..46d3ebf400d 100644
--- a/paddle/fluid/distributed/ps/table/ctr_double_accessor.cc
+++ b/paddle/fluid/distributed/ps/table/ctr_double_accessor.cc
@@ -14,10 +14,9 @@
 
 #include "paddle/fluid/distributed/ps/table/ctr_double_accessor.h"
 
-#include <gflags/gflags.h>
-
 #include "glog/logging.h"
 #include "paddle/fluid/string/string_helper.h"
+#include "paddle/utils/flags.h"
 
 namespace paddle {
 namespace distributed {
diff --git a/paddle/fluid/distributed/ps/table/ctr_dymf_accessor.cc b/paddle/fluid/distributed/ps/table/ctr_dymf_accessor.cc
index 4824ab8946b..a2943c2237c 100644
--- a/paddle/fluid/distributed/ps/table/ctr_dymf_accessor.cc
+++ b/paddle/fluid/distributed/ps/table/ctr_dymf_accessor.cc
@@ -14,10 +14,9 @@
 
 #include "paddle/fluid/distributed/ps/table/ctr_dymf_accessor.h"
 
-#include <gflags/gflags.h>
-
 #include "glog/logging.h"
 #include "paddle/fluid/string/string_helper.h"
+#include "paddle/utils/flags.h"
 
 namespace paddle {
 namespace distributed {
diff --git a/paddle/fluid/distributed/ps/table/depends/dense.h b/paddle/fluid/distributed/ps/table/depends/dense.h
index 3f09376b42d..272fb0b0050 100644
--- a/paddle/fluid/distributed/ps/table/depends/dense.h
+++ b/paddle/fluid/distributed/ps/table/depends/dense.h
@@ -22,8 +22,8 @@
 #include <utility>
 #include <vector>
 
-#include "gflags/gflags.h"
 #include "paddle/fluid/distributed/common/utils.h"
+#include "paddle/utils/flags.h"
 
 namespace paddle {
 namespace distributed {
diff --git a/paddle/fluid/distributed/ps/table/depends/feature_value.h b/paddle/fluid/distributed/ps/table/depends/feature_value.h
index c91502a8552..ce01c876225 100644
--- a/paddle/fluid/distributed/ps/table/depends/feature_value.h
+++ b/paddle/fluid/distributed/ps/table/depends/feature_value.h
@@ -14,11 +14,12 @@
 
 #pragma once
 
-#include <mct/hash-map.hpp>
 #include <vector>
 
-#include "gflags/gflags.h"
+#include <mct/hash-map.hpp>
+
 #include "paddle/fluid/distributed/common/chunk_allocator.h"
+#include "paddle/utils/flags.h"
 
 namespace paddle {
 namespace distributed {
diff --git a/paddle/fluid/distributed/ps/table/depends/initializers.h b/paddle/fluid/distributed/ps/table/depends/initializers.h
index 46722709767..3ebe5549de4 100644
--- a/paddle/fluid/distributed/ps/table/depends/initializers.h
+++ b/paddle/fluid/distributed/ps/table/depends/initializers.h
@@ -21,9 +21,9 @@
 #include <utility>
 #include <vector>
 
-#include "gflags/gflags.h"
 #include "paddle/fluid/operators/truncated_gaussian_random_op.h"
 #include "paddle/phi/core/generator.h"
+#include "paddle/utils/flags.h"
 
 namespace paddle {
 namespace distributed {
@@ -124,13 +124,13 @@ class TruncatedGaussianInitializer : public Initializer {
   }
 
   float GetValue() override {
-    paddle::operators::TruncatedNormal<float> truncated_normal(mean_, std_);
+    ::paddle::operators::TruncatedNormal<float> truncated_normal(mean_, std_);
     float value = truncated_normal(dist_(*random_engine_));
     return value;
   }
 
   void GetValue(float *value, int numel) {
-    paddle::operators::TruncatedNormal<float> truncated_normal(mean_, std_);
+    ::paddle::operators::TruncatedNormal<float> truncated_normal(mean_, std_);
     for (int x = 0; x < numel; ++x) {
       value[x] = truncated_normal(dist_(*random_engine_));
     }
diff --git a/paddle/fluid/distributed/ps/table/graph/class_macro.h b/paddle/fluid/distributed/ps/table/graph/class_macro.h
index bf59dbacb25..895e59d09af 100644
--- a/paddle/fluid/distributed/ps/table/graph/class_macro.h
+++ b/paddle/fluid/distributed/ps/table/graph/class_macro.h
@@ -36,4 +36,4 @@
 #define DECLARE_11_FRIEND_CLASS(a, ...) \
   DECLARE_GRAPH_FRIEND_CLASS(a) DECLARE_10_FRIEND_CLASS(__VA_ARGS__)
 #define REGISTER_GRAPH_FRIEND_CLASS(n, ...) \
-  DECLARE_##n##_FRIEND_CLASS(__VA_ARGS__)
+  PD_DECLARE_##n##_FRIEND_CLASS(__VA_ARGS__)
diff --git a/paddle/fluid/distributed/ps/table/memory_sparse_table.cc b/paddle/fluid/distributed/ps/table/memory_sparse_table.cc
index 5ee23010b52..dbdff119141 100644
--- a/paddle/fluid/distributed/ps/table/memory_sparse_table.cc
+++ b/paddle/fluid/distributed/ps/table/memory_sparse_table.cc
@@ -26,16 +26,18 @@
 // #include "boost/lexical_cast.hpp"
 #include "paddle/fluid/platform/enforce.h"
 
-DEFINE_bool(pserver_print_missed_key_num_every_push,
-            false,
-            "pserver_print_missed_key_num_every_push");
-DEFINE_bool(pserver_create_value_when_push,
-            true,
-            "pserver create value when push");
-DEFINE_bool(pserver_enable_create_feasign_randomly,
-            false,
-            "pserver_enable_create_feasign_randomly");
-DEFINE_int32(pserver_table_save_max_retry, 3, "pserver_table_save_max_retry");
+PD_DEFINE_bool(pserver_print_missed_key_num_every_push,
+               false,
+               "pserver_print_missed_key_num_every_push");
+PD_DEFINE_bool(pserver_create_value_when_push,
+               true,
+               "pserver create value when push");
+PD_DEFINE_bool(pserver_enable_create_feasign_randomly,
+               false,
+               "pserver_enable_create_feasign_randomly");
+PD_DEFINE_int32(pserver_table_save_max_retry,
+                3,
+                "pserver_table_save_max_retry");
 
 namespace paddle {
 namespace distributed {
@@ -333,7 +335,7 @@ int32_t MemorySparseTable::Save(const std::string &dirname,
   TopkCalculator tk(_real_local_shard_num, tk_size);
 
   std::string table_path = TableDir(dirname);
-  _afs_client.remove(paddle::string::format_string(
+  _afs_client.remove(::paddle::string::format_string(
       "%s/part-%03d-*", table_path.c_str(), _shard_idx));
   std::atomic<uint32_t> feasign_size_all{0};
 
@@ -350,15 +352,15 @@ int32_t MemorySparseTable::Save(const std::string &dirname,
     FsChannelConfig channel_config;
     if (_config.compress_in_save() && (save_param == 0 || save_param == 3)) {
       channel_config.path =
-          paddle::string::format_string("%s/part-%03d-%05d.gz",
-                                        table_path.c_str(),
-                                        _shard_idx,
-                                        file_start_idx + i);
+          ::paddle::string::format_string("%s/part-%03d-%05d.gz",
+                                          table_path.c_str(),
+                                          _shard_idx,
+                                          file_start_idx + i);
     } else {
-      channel_config.path = paddle::string::format_string("%s/part-%03d-%05d",
-                                                          table_path.c_str(),
-                                                          _shard_idx,
-                                                          file_start_idx + i);
+      channel_config.path = ::paddle::string::format_string("%s/part-%03d-%05d",
+                                                            table_path.c_str(),
+                                                            _shard_idx,
+                                                            file_start_idx + i);
     }
     channel_config.converter = _value_accesor->Converter(save_param).converter;
     channel_config.deconverter =
@@ -385,7 +387,7 @@ int32_t MemorySparseTable::Save(const std::string &dirname,
         if (_value_accesor->Save(it.value().data(), save_param)) {
           std::string format_value = _value_accesor->ParseToString(
               it.value().data(), it.value().size());
-          if (0 != write_channel->write_line(paddle::string::format_string(
+          if (0 != write_channel->write_line(::paddle::string::format_string(
                        "%lu %s", it.key(), format_value.c_str()))) {
             ++retry_num;
             is_write_failed = true;
@@ -432,7 +434,7 @@ int32_t MemorySparseTable::SavePatch(const std::string &path, int save_param) {
   }
   size_t file_start_idx = _m_avg_local_shard_num * _shard_idx;
   std::string table_path = TableDir(path);
-  _afs_client.remove(paddle::string::format_string(
+  _afs_client.remove(::paddle::string::format_string(
       "%s/part-%03d-*", table_path.c_str(), _shard_idx));
   int thread_num = _m_real_local_shard_num < 20 ? _m_real_local_shard_num : 20;
 
@@ -442,10 +444,10 @@ int32_t MemorySparseTable::SavePatch(const std::string &path, int save_param) {
 #pragma omp parallel for schedule(dynamic)
   for (int i = 0; i < _m_real_local_shard_num; ++i) {
     FsChannelConfig channel_config;
-    channel_config.path = paddle::string::format_string("%s/part-%03d-%05d",
-                                                        table_path.c_str(),
-                                                        _shard_idx,
-                                                        file_start_idx + i);
+    channel_config.path = ::paddle::string::format_string("%s/part-%03d-%05d",
+                                                          table_path.c_str(),
+                                                          _shard_idx,
+                                                          file_start_idx + i);
 
     channel_config.converter = _value_accesor->Converter(save_param).converter;
     channel_config.deconverter =
@@ -469,8 +471,9 @@ int32_t MemorySparseTable::SavePatch(const std::string &path, int save_param) {
             if (_value_accesor->Save(it.value().data(), save_param)) {
               std::string format_value = _value_accesor->ParseToString(
                   it.value().data(), it.value().size());
-              if (0 != write_channel->write_line(paddle::string::format_string(
-                           "%lu %s", it.key(), format_value.c_str()))) {
+              if (0 !=
+                  write_channel->write_line(::paddle::string::format_string(
+                      "%lu %s", it.key(), format_value.c_str()))) {
                 ++retry_num;
                 is_write_failed = true;
                 LOG(ERROR) << "MemorySparseTable save failed, retry it! path:"
@@ -503,10 +506,10 @@ int32_t MemorySparseTable::SavePatch(const std::string &path, int save_param) {
     feasign_size_all += feasign_size;
   }
   LOG(INFO) << "MemorySparseTable save patch success, path:"
-            << paddle::string::format_string("%s/%03d/part-%03d-",
-                                             path.c_str(),
-                                             _config.table_id(),
-                                             _shard_idx)
+            << ::paddle::string::format_string("%s/%03d/part-%03d-",
+                                               path.c_str(),
+                                               _config.table_id(),
+                                               _shard_idx)
             << " from " << file_start_idx << " to "
             << file_start_idx + _m_real_local_shard_num - 1
             << ", feasign size: " << feasign_size_all;
@@ -519,7 +522,7 @@ int64_t MemorySparseTable::CacheShuffle(
     double cache_threshold,
     std::function<std::future<int32_t>(
         int msg_type, int to_pserver_id, std::string &msg)> send_msg_func,
-    paddle::framework::Channel<std::pair<uint64_t, std::string>>
+    ::paddle::framework::Channel<std::pair<uint64_t, std::string>>
         &shuffled_channel,
     const std::vector<Table *> &table_ptrs) {
   LOG(INFO) << "cache shuffle with cache threshold: " << cache_threshold;
@@ -536,24 +539,24 @@ int64_t MemorySparseTable::CacheShuffle(
   int thread_num = _real_local_shard_num < 20 ? _real_local_shard_num : 20;
 
   std::vector<
-      paddle::framework::ChannelWriter<std::pair<uint64_t, std::string>>>
+      ::paddle::framework::ChannelWriter<std::pair<uint64_t, std::string>>>
       writers(_real_local_shard_num);
   std::vector<std::vector<std::pair<uint64_t, std::string>>> datas(
       _real_local_shard_num);
 
   int feasign_size = 0;
-  std::vector<paddle::framework::Channel<std::pair<uint64_t, std::string>>>
+  std::vector<::paddle::framework::Channel<std::pair<uint64_t, std::string>>>
       tmp_channels;
   for (int i = 0; i < _real_local_shard_num; ++i) {
     tmp_channels.push_back(
-        paddle::framework::MakeChannel<std::pair<uint64_t, std::string>>());
+        ::paddle::framework::MakeChannel<std::pair<uint64_t, std::string>>());
   }
 
   omp_set_num_threads(thread_num);
 #pragma omp parallel for schedule(dynamic)
   for (int i = 0; i < _real_local_shard_num; ++i) {
-    paddle::framework::ChannelWriter<std::pair<uint64_t, std::string>> &writer =
-        writers[i];
+    ::paddle::framework::ChannelWriter<std::pair<uint64_t, std::string>>
+        &writer = writers[i];
     writer.Reset(tmp_channels[i].get());
 
     for (auto table_ptr : table_ptrs) {
@@ -579,15 +582,15 @@ int64_t MemorySparseTable::CacheShuffle(
   // shard num: " << _real_local_shard_num;
   std::vector<std::pair<uint64_t, std::string>> local_datas;
   for (int idx_shard = 0; idx_shard < _real_local_shard_num; ++idx_shard) {
-    paddle::framework::ChannelWriter<std::pair<uint64_t, std::string>> &writer =
-        writers[idx_shard];
+    ::paddle::framework::ChannelWriter<std::pair<uint64_t, std::string>>
+        &writer = writers[idx_shard];
     auto channel = writer.channel();
     std::vector<std::pair<uint64_t, std::string>> &data = datas[idx_shard];
-    std::vector<paddle::framework::BinaryArchive> ars(shuffle_node_num);
+    std::vector<::paddle::framework::BinaryArchive> ars(shuffle_node_num);
     while (channel->Read(data)) {
       for (auto &t : data) {
         auto pserver_id =
-            paddle::distributed::local_random_engine()() % shuffle_node_num;
+            ::paddle::distributed::local_random_engine()() % shuffle_node_num;
         if (pserver_id != _shard_idx) {
           ars[pserver_id] << t;
         } else {
@@ -618,7 +621,7 @@ int64_t MemorySparseTable::CacheShuffle(
         t.wait();
       }
       ars.clear();
-      ars = std::vector<paddle::framework::BinaryArchive>(shuffle_node_num);
+      ars = std::vector<::paddle::framework::BinaryArchive>(shuffle_node_num);
       data = std::vector<std::pair<uint64_t, std::string>>();
     }
   }
@@ -629,20 +632,20 @@ int64_t MemorySparseTable::CacheShuffle(
 int32_t MemorySparseTable::SaveCache(
     const std::string &path,
     const std::string &param,
-    paddle::framework::Channel<std::pair<uint64_t, std::string>>
+    ::paddle::framework::Channel<std::pair<uint64_t, std::string>>
         &shuffled_channel) {
   if (_shard_idx >= _config.sparse_table_cache_file_num()) {
     return 0;
   }
   int save_param = atoi(param.c_str());  // batch_model:0  xbox:1
-  std::string table_path = paddle::string::format_string(
+  std::string table_path = ::paddle::string::format_string(
       "%s/%03d_cache/", path.c_str(), _config.table_id());
-  _afs_client.remove(paddle::string::format_string(
+  _afs_client.remove(::paddle::string::format_string(
       "%s/part-%03d", table_path.c_str(), _shard_idx));
   uint32_t feasign_size = 0;
   FsChannelConfig channel_config;
   // not compress cache model
-  channel_config.path = paddle::string::format_string(
+  channel_config.path = ::paddle::string::format_string(
       "%s/part-%03d", table_path.c_str(), _shard_idx);
   channel_config.converter = _value_accesor->Converter(save_param).converter;
   channel_config.deconverter =
@@ -654,7 +657,7 @@ int32_t MemorySparseTable::SaveCache(
   while (shuffled_channel->Read(data)) {
     for (auto &t : data) {
       ++feasign_size;
-      if (0 != write_channel->write_line(paddle::string::format_string(
+      if (0 != write_channel->write_line(::paddle::string::format_string(
                    "%lu %s", t.first, t.second.c_str()))) {
         LOG(ERROR) << "Cache Table save failed, "
                       "path:"
diff --git a/paddle/fluid/distributed/ps/table/sparse_accessor.cc b/paddle/fluid/distributed/ps/table/sparse_accessor.cc
index afa94703233..77460a8d17e 100644
--- a/paddle/fluid/distributed/ps/table/sparse_accessor.cc
+++ b/paddle/fluid/distributed/ps/table/sparse_accessor.cc
@@ -14,10 +14,9 @@
 
 #include "paddle/fluid/distributed/ps/table/sparse_accessor.h"
 
-#include <gflags/gflags.h>
-
 #include "glog/logging.h"
 #include "paddle/fluid/string/string_helper.h"
+#include "paddle/utils/flags.h"
 
 namespace paddle {
 namespace distributed {
@@ -300,7 +299,7 @@ std::string SparseAccessor::ParseToString(const float* v, int param) {
 int SparseAccessor::ParseFromString(const std::string& str, float* value) {
   _embedx_sgd_rule->InitValue(value + sparse_feature_value.EmbedxWIndex(),
                               value + sparse_feature_value.EmbedxG2SumIndex());
-  auto ret = paddle::string::str_to_float(str.data(), value);
+  auto ret = ::paddle::string::str_to_float(str.data(), value);
   CHECK(ret >= 6) << "expect more than 6 real:" << ret;
   return ret;
 }
diff --git a/paddle/fluid/distributed/ps/table/sparse_sgd_rule.cc b/paddle/fluid/distributed/ps/table/sparse_sgd_rule.cc
index 6ab4506d29e..0c66e9d407a 100644
--- a/paddle/fluid/distributed/ps/table/sparse_sgd_rule.cc
+++ b/paddle/fluid/distributed/ps/table/sparse_sgd_rule.cc
@@ -14,11 +14,11 @@
 
 #include "paddle/fluid/distributed/ps/table/sparse_sgd_rule.h"
 
-#include <gflags/gflags.h>
-
 #include "glog/logging.h"
 
-DEFINE_bool(enable_show_scale_gradient, true, "enable show scale gradient");
+#include "paddle/utils/flags.h"
+
+PD_DEFINE_bool(enable_show_scale_gradient, true, "enable show scale gradient");
 
 namespace paddle {
 namespace distributed {
diff --git a/paddle/fluid/distributed/ps/table/ssd_sparse_table.cc b/paddle/fluid/distributed/ps/table/ssd_sparse_table.cc
index 7d96e0f49d1..bb6de81cbb3 100644
--- a/paddle/fluid/distributed/ps/table/ssd_sparse_table.cc
+++ b/paddle/fluid/distributed/ps/table/ssd_sparse_table.cc
@@ -20,11 +20,11 @@
 #include "paddle/fluid/framework/archive.h"
 #include "paddle/fluid/platform/flags.h"
 #include "paddle/utils/string/string_helper.h"
-DECLARE_bool(pserver_print_missed_key_num_every_push);
-DECLARE_bool(pserver_create_value_when_push);
-DECLARE_bool(pserver_enable_create_feasign_randomly);
-DEFINE_bool(pserver_open_strict_check, false, "pserver_open_strict_check");
-DEFINE_int32(pserver_load_batch_size, 5000, "load batch size for ssd");
+PD_DECLARE_bool(pserver_print_missed_key_num_every_push);
+PD_DECLARE_bool(pserver_create_value_when_push);
+PD_DECLARE_bool(pserver_enable_create_feasign_randomly);
+PD_DEFINE_bool(pserver_open_strict_check, false, "pserver_open_strict_check");
+PD_DEFINE_int32(pserver_load_batch_size, 5000, "load batch size for ssd");
 PADDLE_DEFINE_EXPORTED_string(rocksdb_path,
                               "database",
                               "path of sparse table rocksdb file");
@@ -34,7 +34,7 @@ namespace distributed {
 
 int32_t SSDSparseTable::Initialize() {
   MemorySparseTable::Initialize();
-  _db = paddle::distributed::RocksDBHandler::GetInstance();
+  _db = ::paddle::distributed::RocksDBHandler::GetInstance();
   _db->initialize(FLAGS_rocksdb_path, _real_local_shard_num);
   VLOG(0) << "initalize SSDSparseTable succ";
   VLOG(0) << "SSD FLAGS_pserver_print_missed_key_num_every_push:"
@@ -135,7 +135,7 @@ int32_t SSDSparseTable::PullSparse(float* pull_values,
                     } else {
                       data_size = tmp_string.size() / sizeof(float);
                       memcpy(data_buffer_ptr,
-                             paddle::string::str_to_float(tmp_string),
+                             ::paddle::string::str_to_float(tmp_string),
                              data_size * sizeof(float));
                       // from rocksdb to mem
                       auto& feature_value = local_shard[key];
@@ -239,7 +239,7 @@ int32_t SSDSparseTable::PullSparsePtr(int shard_id,
                 auto& feature_value = local_shard[cur_key];
                 feature_value.resize(data_size);
                 memcpy(const_cast<float*>(feature_value.data()),
-                       paddle::string::str_to_float(
+                       ::paddle::string::str_to_float(
                            cur_ctx->batch_values[idx].data()),
                        data_size * sizeof(float));
                 _db->del_data(shard_id,
@@ -302,7 +302,7 @@ int32_t SSDSparseTable::PullSparsePtr(int shard_id,
           feature_value.resize(data_size);
           memcpy(
               const_cast<float*>(feature_value.data()),
-              paddle::string::str_to_float(cur_ctx->batch_values[idx].data()),
+              ::paddle::string::str_to_float(cur_ctx->batch_values[idx].data()),
               data_size * sizeof(float));
           _db->del_data(
               shard_id, reinterpret_cast<char*>(&cur_key), sizeof(uint64_t));
@@ -529,7 +529,7 @@ int32_t SSDSparseTable::Shrink(const std::string& param) {
     auto* it = _db->get_iterator(i);
     for (it->SeekToFirst(); it->Valid(); it->Next()) {
       if (_value_accesor->Shrink(
-              paddle::string::str_to_float(it->value().data()))) {
+              ::paddle::string::str_to_float(it->value().data()))) {
         _db->del_data(i, it->key().data(), it->key().size());
         ssd_count++;
       } else {
@@ -627,7 +627,7 @@ int32_t SSDSparseTable::SaveWithString(const std::string& path,
   VLOG(0) << "TopkCalculator top n:" << _cache_tk_size;
   size_t file_start_idx = _avg_local_shard_num * _shard_idx;
   std::string table_path = TableDir(path);
-  _afs_client.remove(paddle::string::format_string(
+  _afs_client.remove(::paddle::string::format_string(
       "%s/part-%03d-*", table_path.c_str(), _shard_idx));
 #ifdef PADDLE_WITH_GPU_GRAPH
   int thread_num = _real_local_shard_num;
@@ -640,12 +640,11 @@ int32_t SSDSparseTable::SaveWithString(const std::string& path,
   // feasign_size = 0;
 
   std::vector<
-      paddle::framework::Channel<std::pair<uint64_t, std::vector<float>>>>
+      ::paddle::framework::Channel<std::pair<uint64_t, std::vector<float>>>>
       fs_channel;
   for (int i = 0; i < _real_local_shard_num; i++) {
-    fs_channel.push_back(
-        paddle::framework::MakeChannel<std::pair<uint64_t, std::vector<float>>>(
-            10240));
+    fs_channel.push_back(::paddle::framework::MakeChannel<
+                         std::pair<uint64_t, std::vector<float>>>(10240));
   }
   std::vector<std::thread> threads;
   threads.resize(_real_local_shard_num);
@@ -659,29 +658,29 @@ int32_t SSDSparseTable::SaveWithString(const std::string& path,
     FsChannelConfig channel_config;
     if (_config.compress_in_save() && (save_param == 0 || save_param == 3)) {
       channel_config.path =
-          paddle::string::format_string("%s/part-%03d-%05d.gz",
-                                        table_path.c_str(),
-                                        _shard_idx,
-                                        file_start_idx + file_num);
+          ::paddle::string::format_string("%s/part-%03d-%05d.gz",
+                                          table_path.c_str(),
+                                          _shard_idx,
+                                          file_start_idx + file_num);
     } else {
       channel_config.path =
-          paddle::string::format_string("%s/part-%03d-%05d",
-                                        table_path.c_str(),
-                                        _shard_idx,
-                                        file_start_idx + file_num);
+          ::paddle::string::format_string("%s/part-%03d-%05d",
+                                          table_path.c_str(),
+                                          _shard_idx,
+                                          file_start_idx + file_num);
     }
     channel_config.converter = _value_accesor->Converter(save_param).converter;
     channel_config.deconverter =
         _value_accesor->Converter(save_param).deconverter;
     auto write_channel =
         _afs_client.open_w(channel_config, 1024 * 1024 * 40, &err_no);
-    paddle::framework::ChannelReader<std::pair<uint64_t, std::vector<float>>>
+    ::paddle::framework::ChannelReader<std::pair<uint64_t, std::vector<float>>>
         reader(fs_channel[file_num].get());
     std::pair<uint64_t, std::vector<float>> out_str;
     while (reader >> out_str) {
       std::string format_value = _value_accesor->ParseToString(
           out_str.second.data(), out_str.second.size());
-      if (0 != write_channel->write_line(paddle::string::format_string(
+      if (0 != write_channel->write_line(::paddle::string::format_string(
                    "%lu %s", out_str.first, format_value.c_str()))) {
         LOG(FATAL) << "SSDSparseTable save failed, retry it! path:"
                    << channel_config.path;
@@ -693,8 +692,8 @@ int32_t SSDSparseTable::SaveWithString(const std::string& path,
     threads[i] = std::thread(save_func, i);
   }
 
-  std::vector<
-      paddle::framework::ChannelWriter<std::pair<uint64_t, std::vector<float>>>>
+  std::vector<::paddle::framework::ChannelWriter<
+      std::pair<uint64_t, std::vector<float>>>>
       writers(_real_local_shard_num);
   omp_set_num_threads(thread_num);
 #pragma omp parallel for schedule(dynamic)
@@ -726,14 +725,14 @@ int32_t SSDSparseTable::SaveWithString(const std::string& path,
       auto* it = _db->get_iterator(i);
       for (it->SeekToFirst(); it->Valid(); it->Next()) {
         bool need_save = _value_accesor->Save(
-            paddle::string::str_to_float(it->value().data()), save_param);
+            ::paddle::string::str_to_float(it->value().data()), save_param);
         _value_accesor->UpdateStatAfterSave(
-            paddle::string::str_to_float(it->value().data()), save_param);
+            ::paddle::string::str_to_float(it->value().data()), save_param);
         if (need_save) {
           std::vector<float> feature_value;
           feature_value.resize(it->value().size() / sizeof(float));
           memcpy(const_cast<float*>(feature_value.data()),
-                 paddle::string::str_to_float(it->value().data()),
+                 ::paddle::string::str_to_float(it->value().data()),
                  it->value().size());
           writer << std::make_pair(*(reinterpret_cast<uint64_t*>(
                                        const_cast<char*>(it->key().data()))),
@@ -766,10 +765,10 @@ int32_t SSDSparseTable::SaveWithString(const std::string& path,
   }
   VLOG(0) << "SSDSparseTable save success, feasign size:" << feasign_size_all
           << ", path:"
-          << paddle::string::format_string("%s/%03d/part-%03d-",
-                                           path.c_str(),
-                                           _config.table_id(),
-                                           _shard_idx)
+          << ::paddle::string::format_string("%s/%03d/part-%03d-",
+                                             path.c_str(),
+                                             _config.table_id(),
+                                             _shard_idx)
           << " from " << file_start_idx << " to "
           << file_start_idx + _real_local_shard_num - 1;
   _local_show_threshold = tk.top();
@@ -800,7 +799,7 @@ int32_t SSDSparseTable::SaveWithStringMultiOutput(const std::string& path,
   VLOG(0) << "TopkCalculator top n:" << _cache_tk_size;
   size_t file_start_idx = _avg_local_shard_num * _shard_idx;
   std::string table_path = TableDir(path);
-  _afs_client.remove(paddle::string::format_string(
+  _afs_client.remove(::paddle::string::format_string(
       "%s/part-%03d-*", table_path.c_str(), _shard_idx));
 #ifdef PADDLE_WITH_GPU_GRAPH
   int thread_num = _real_local_shard_num;
@@ -809,17 +808,17 @@ int32_t SSDSparseTable::SaveWithStringMultiOutput(const std::string& path,
 #endif
 
   std::atomic<uint32_t> feasign_size_all{0};
-  std::vector<paddle::framework::Channel<std::shared_ptr<MemRegion>>>
+  std::vector<::paddle::framework::Channel<std::shared_ptr<MemRegion>>>
       busy_channel;
-  std::vector<paddle::framework::Channel<std::shared_ptr<MemRegion>>>
+  std::vector<::paddle::framework::Channel<std::shared_ptr<MemRegion>>>
       free_channel;
   std::vector<std::thread> threads;
 
   for (int i = 0; i < _real_local_shard_num; i++) {
     busy_channel.push_back(
-        paddle::framework::MakeChannel<std::shared_ptr<MemRegion>>());
+        ::paddle::framework::MakeChannel<std::shared_ptr<MemRegion>>());
     free_channel.push_back(
-        paddle::framework::MakeChannel<std::shared_ptr<MemRegion>>());
+        ::paddle::framework::MakeChannel<std::shared_ptr<MemRegion>>());
   }
   threads.resize(_real_local_shard_num);
 
@@ -848,14 +847,14 @@ int32_t SSDSparseTable::SaveWithStringMultiOutput(const std::string& path,
                            int split_num) {
       if (compress && (save_param == 0 || save_param == 3)) {
         // return
-        // paddle::string::format_string("%s/part-%03d-%05d-%03d-%03d.gz",
+        // ::paddle::string::format_string("%s/part-%03d-%05d-%03d-%03d.gz",
         //     table_path, node_num, shard_num, part_num, split_num);
-        return paddle::string::format_string(
+        return ::paddle::string::format_string(
             "%s/part-%05d-%03d.gz", table_path, shard_num, split_num);
       } else {
-        // return paddle::string::format_string("%s/part-%03d-%05d-%03d-%03d",
+        // return ::paddle::string::format_string("%s/part-%03d-%05d-%03d-%03d",
         //     table_path, node_num,  shard_num, part_num, split_num);
-        return paddle::string::format_string(
+        return ::paddle::string::format_string(
             "%s/part-%05d-%03d", table_path, shard_num, split_num);
       }
     };
@@ -899,7 +898,7 @@ int32_t SSDSparseTable::SaveWithStringMultiOutput(const std::string& path,
         int dim = len / sizeof(float);
 
         std::string format_value = _value_accesor->ParseToString(value, dim);
-        if (0 != write_channel->write_line(paddle::string::format_string(
+        if (0 != write_channel->write_line(::paddle::string::format_string(
                      "%lu %s", k, format_value.c_str()))) {
           VLOG(0) << "SSDSparseTable save failed, retry it! path:"
                   << channel_config.path;
@@ -985,9 +984,9 @@ int32_t SSDSparseTable::SaveWithStringMultiOutput(const std::string& path,
       auto* it = _db->get_iterator(i);
       for (it->SeekToFirst(); it->Valid(); it->Next()) {
         bool need_save = _value_accesor->Save(
-            paddle::string::str_to_float(it->value().data()), save_param);
+            ::paddle::string::str_to_float(it->value().data()), save_param);
         _value_accesor->UpdateStatAfterSave(
-            paddle::string::str_to_float(it->value().data()), save_param);
+            ::paddle::string::str_to_float(it->value().data()), save_param);
         if (need_save) {
           uint32_t len =
               sizeof(uint64_t) + it->value().size() + sizeof(uint32_t);
@@ -1052,10 +1051,10 @@ int32_t SSDSparseTable::SaveWithStringMultiOutput(const std::string& path,
   }
   VLOG(0) << "DownpourSparseSSDTable save success, feasign size:"
           << feasign_size_all << " ,path:"
-          << paddle::string::format_string("%s/%03d/part-%03d-",
-                                           path.c_str(),
-                                           _config.table_id(),
-                                           _shard_idx)
+          << ::paddle::string::format_string("%s/%03d/part-%03d-",
+                                             path.c_str(),
+                                             _config.table_id(),
+                                             _shard_idx)
           << " from " << file_start_idx << " to "
           << file_start_idx + _real_local_shard_num - 1;
   if (_config.enable_sparse_table_cache()) {
@@ -1085,7 +1084,7 @@ int32_t SSDSparseTable::SaveWithBinary(const std::string& path,
   VLOG(0) << "TopkCalculator top n:" << _cache_tk_size;
   size_t file_start_idx = _avg_local_shard_num * _shard_idx;
   std::string table_path = TableDir(path);
-  _afs_client.remove(paddle::string::format_string(
+  _afs_client.remove(::paddle::string::format_string(
       "%s/part-%03d-*", table_path.c_str(), _shard_idx));
 #ifdef PADDLE_WITH_GPU_GRAPH
   int thread_num = _real_local_shard_num;
@@ -1094,17 +1093,17 @@ int32_t SSDSparseTable::SaveWithBinary(const std::string& path,
 #endif
 
   std::atomic<uint32_t> feasign_size_all{0};
-  std::vector<paddle::framework::Channel<std::shared_ptr<MemRegion>>>
+  std::vector<::paddle::framework::Channel<std::shared_ptr<MemRegion>>>
       busy_channel;
-  std::vector<paddle::framework::Channel<std::shared_ptr<MemRegion>>>
+  std::vector<::paddle::framework::Channel<std::shared_ptr<MemRegion>>>
       free_channel;
   std::vector<std::thread> threads;
 
   for (int i = 0; i < _real_local_shard_num; i++) {
     busy_channel.push_back(
-        paddle::framework::MakeChannel<std::shared_ptr<MemRegion>>());
+        ::paddle::framework::MakeChannel<std::shared_ptr<MemRegion>>());
     free_channel.push_back(
-        paddle::framework::MakeChannel<std::shared_ptr<MemRegion>>());
+        ::paddle::framework::MakeChannel<std::shared_ptr<MemRegion>>());
   }
   threads.resize(_real_local_shard_num);
 
@@ -1132,19 +1131,19 @@ int32_t SSDSparseTable::SaveWithBinary(const std::string& path,
                            int part_num,
                            int split_num) {
       if (compress && (save_param == 0 || save_param == 3)) {
-        return paddle::string::format_string("%s/part-%03d-%05d-%03d-%03d.gz",
-                                             table_path,
-                                             node_num,
-                                             shard_num,
-                                             part_num,
-                                             split_num);
+        return ::paddle::string::format_string("%s/part-%03d-%05d-%03d-%03d.gz",
+                                               table_path,
+                                               node_num,
+                                               shard_num,
+                                               part_num,
+                                               split_num);
       } else {
-        return paddle::string::format_string("%s/part-%03d-%05d-%03d-%03d",
-                                             table_path,
-                                             node_num,
-                                             shard_num,
-                                             part_num,
-                                             split_num);
+        return ::paddle::string::format_string("%s/part-%03d-%05d-%03d-%03d",
+                                               table_path,
+                                               node_num,
+                                               shard_num,
+                                               part_num,
+                                               split_num);
       }
     };
     std::shared_ptr<MemRegion> region = nullptr;
@@ -1206,7 +1205,7 @@ int32_t SSDSparseTable::SaveWithBinary(const std::string& path,
           int dim = len / sizeof(float);
 
           std::string format_value = _value_accesor->ParseToString(value, dim);
-          if (0 != write_channel->write_line(paddle::string::format_string(
+          if (0 != write_channel->write_line(::paddle::string::format_string(
                        "%lu %s", k, format_value.c_str()))) {
             LOG(FATAL) << "SSDSparseTable save failed, retry it! path:"
                        << channel_config.path;
@@ -1277,9 +1276,9 @@ int32_t SSDSparseTable::SaveWithBinary(const std::string& path,
       auto* it = _db->get_iterator(i);
       for (it->SeekToFirst(); it->Valid(); it->Next()) {
         bool need_save = _value_accesor->Save(
-            paddle::string::str_to_float(it->value().data()), save_param);
+            ::paddle::string::str_to_float(it->value().data()), save_param);
         _value_accesor->UpdateStatAfterSave(
-            paddle::string::str_to_float(it->value().data()), save_param);
+            ::paddle::string::str_to_float(it->value().data()), save_param);
         if (need_save) {
           uint32_t len =
               sizeof(uint64_t) + it->value().size() + sizeof(uint32_t);
@@ -1344,10 +1343,10 @@ int32_t SSDSparseTable::SaveWithBinary(const std::string& path,
   }
   VLOG(0) << "DownpourSparseSSDTable save success, feasign size:"
           << feasign_size_all << " ,path:"
-          << paddle::string::format_string("%s/%03d/part-%03d-",
-                                           path.c_str(),
-                                           _config.table_id(),
-                                           _shard_idx)
+          << ::paddle::string::format_string("%s/%03d/part-%03d-",
+                                             path.c_str(),
+                                             _config.table_id(),
+                                             _shard_idx)
           << " from " << file_start_idx << " to "
           << file_start_idx + _real_local_shard_num - 1;
   if (_config.enable_sparse_table_cache()) {
@@ -1364,7 +1363,7 @@ int64_t SSDSparseTable::CacheShuffle(
     double cache_threshold,
     std::function<std::future<int32_t>(
         int msg_type, int to_pserver_id, std::string& msg)> send_msg_func,
-    paddle::framework::Channel<std::pair<uint64_t, std::string>>&
+    ::paddle::framework::Channel<std::pair<uint64_t, std::string>>&
         shuffled_channel,
     const std::vector<Table*>& table_ptrs) {
   LOG(INFO) << "cache shuffle with cache threshold: " << cache_threshold
@@ -1381,27 +1380,27 @@ int64_t SSDSparseTable::CacheShuffle(
   int thread_num = _real_local_shard_num < 20 ? _real_local_shard_num : 20;
 
   std::vector<
-      paddle::framework::ChannelWriter<std::pair<uint64_t, std::string>>>
+      ::paddle::framework::ChannelWriter<std::pair<uint64_t, std::string>>>
       writers(_real_local_shard_num);
   std::vector<std::vector<std::pair<uint64_t, std::string>>> datas(
       _real_local_shard_num);
 
   int feasign_size = 0;
-  std::vector<paddle::framework::Channel<std::pair<uint64_t, std::string>>>
+  std::vector<::paddle::framework::Channel<std::pair<uint64_t, std::string>>>
       tmp_channels;
   for (int i = 0; i < _real_local_shard_num; ++i) {
     tmp_channels.push_back(
-        paddle::framework::MakeChannel<std::pair<uint64_t, std::string>>());
+        ::paddle::framework::MakeChannel<std::pair<uint64_t, std::string>>());
   }
 
   omp_set_num_threads(thread_num);
 #pragma omp parallel for schedule(dynamic)
   for (int i = 0; i < _real_local_shard_num; ++i) {
-    paddle::framework::ChannelWriter<std::pair<uint64_t, std::string>>& writer =
-        writers[i];
-    //    std::shared_ptr<paddle::framework::ChannelObject<std::pair<uint64_t,
+    ::paddle::framework::ChannelWriter<std::pair<uint64_t, std::string>>&
+        writer = writers[i];
+    //    std::shared_ptr<::paddle::framework::ChannelObject<std::pair<uint64_t,
     //    std::string>>> tmp_chan =
-    //        paddle::framework::MakeChannel<std::pair<uint64_t,
+    //        ::paddle::framework::MakeChannel<std::pair<uint64_t,
     //        std::string>>();
     writer.Reset(tmp_channels[i].get());
 
@@ -1426,15 +1425,15 @@ int64_t SSDSparseTable::CacheShuffle(
             << _real_local_shard_num;
   std::vector<std::pair<uint64_t, std::string>> local_datas;
   for (int idx_shard = 0; idx_shard < _real_local_shard_num; ++idx_shard) {
-    paddle::framework::ChannelWriter<std::pair<uint64_t, std::string>>& writer =
-        writers[idx_shard];
+    ::paddle::framework::ChannelWriter<std::pair<uint64_t, std::string>>&
+        writer = writers[idx_shard];
     auto channel = writer.channel();
     std::vector<std::pair<uint64_t, std::string>>& data = datas[idx_shard];
-    std::vector<paddle::framework::BinaryArchive> ars(shuffle_node_num);
+    std::vector<::paddle::framework::BinaryArchive> ars(shuffle_node_num);
     while (channel->Read(data)) {
       for (auto& t : data) {
         auto pserver_id =
-            paddle::distributed::local_random_engine()() % shuffle_node_num;
+            ::paddle::distributed::local_random_engine()() % shuffle_node_num;
         if (pserver_id != _shard_idx) {
           ars[pserver_id] << t;
         } else {
@@ -1465,7 +1464,7 @@ int64_t SSDSparseTable::CacheShuffle(
         t.wait();
       }
       ars.clear();
-      ars = std::vector<paddle::framework::BinaryArchive>(shuffle_node_num);
+      ars = std::vector<::paddle::framework::BinaryArchive>(shuffle_node_num);
       data = std::vector<std::pair<uint64_t, std::string>>();
     }
   }
@@ -1477,20 +1476,20 @@ int64_t SSDSparseTable::CacheShuffle(
 int32_t SSDSparseTable::SaveCache(
     const std::string& path,
     const std::string& param,
-    paddle::framework::Channel<std::pair<uint64_t, std::string>>&
+    ::paddle::framework::Channel<std::pair<uint64_t, std::string>>&
         shuffled_channel) {
   if (_shard_idx >= _config.sparse_table_cache_file_num()) {
     return 0;
   }
   int save_param = atoi(param.c_str());  // batch_model:0  xbox:1
-  std::string table_path = paddle::string::format_string(
+  std::string table_path = ::paddle::string::format_string(
       "%s/%03d_cache/", path.c_str(), _config.table_id());
-  _afs_client.remove(paddle::string::format_string(
+  _afs_client.remove(::paddle::string::format_string(
       "%s/part-%03d", table_path.c_str(), _shard_idx));
   uint32_t feasign_size = 0;
   FsChannelConfig channel_config;
   // not compress cache model
-  channel_config.path = paddle::string::format_string(
+  channel_config.path = ::paddle::string::format_string(
       "%s/part-%03d", table_path.c_str(), _shard_idx);
   channel_config.converter = _value_accesor->Converter(save_param).converter;
   channel_config.deconverter =
@@ -1502,7 +1501,7 @@ int32_t SSDSparseTable::SaveCache(
   while (shuffled_channel->Read(data)) {
     for (auto& t : data) {
       ++feasign_size;
-      if (0 != write_channel->write_line(paddle::string::format_string(
+      if (0 != write_channel->write_line(::paddle::string::format_string(
                    "%lu %s", t.first, t.second.c_str()))) {
         LOG(ERROR) << "Cache Table save failed, "
                       "path:"
@@ -1580,7 +1579,7 @@ int32_t SSDSparseTable::LoadWithString(
 #endif
 
   for (int i = 0; i < _real_local_shard_num; i++) {
-    _fs_channel.push_back(paddle::framework::MakeChannel<std::string>(30000));
+    _fs_channel.push_back(::paddle::framework::MakeChannel<std::string>(30000));
   }
 
   std::vector<std::thread> threads;
@@ -1598,7 +1597,7 @@ int32_t SSDSparseTable::LoadWithString(
 
     std::string line_data;
     auto read_channel = _afs_client.open_r(channel_config, 0, &err_no);
-    paddle::framework::ChannelWriter<std::string> writer(
+    ::paddle::framework::ChannelWriter<std::string> writer(
         _fs_channel[file_num].get());
     while (read_channel->read_line(line_data) == 0 && line_data.size() > 1) {
       writer << line_data;
@@ -1638,7 +1637,8 @@ int32_t SSDSparseTable::LoadWithString(
     uint64_t filter_time = 0;
     uint64_t filter_begin = 0;
 
-    paddle::framework::ChannelReader<std::string> reader(_fs_channel[i].get());
+    ::paddle::framework::ChannelReader<std::string> reader(
+        _fs_channel[i].get());
 
     while (reader >> line_data) {
       uint64_t key = std::strtoul(line_data.data(), &end, 10);
@@ -1724,8 +1724,8 @@ int32_t SSDSparseTable::LoadWithBinary(const std::string& path, int param) {
       _value_accesor->GetAccessorInfo().mf_size / sizeof(float);
   // task pool _file_num_one_shard default 7
   auto task_pool = std::make_shared<::ThreadPool>(_real_local_shard_num * 7);
-  auto filelists = _afs_client.list(
-      paddle::string::format_string("%s/part-%03d*", path.c_str(), _shard_idx));
+  auto filelists = _afs_client.list(::paddle::string::format_string(
+      "%s/part-%03d*", path.c_str(), _shard_idx));
   // #pragma omp parallel for schedule(dynamic)
   std::vector<std::future<int>> tasks;
 
@@ -1736,7 +1736,7 @@ int32_t SSDSparseTable::LoadWithBinary(const std::string& path, int param) {
     // _value_accesor->Converter(param).deconverter;
     for (auto& filename : filelists) {
       std::vector<std::string> split_filename_string =
-          paddle::string::split_string<std::string>(filename, "-");
+          ::paddle::string::split_string<std::string>(filename, "-");
       int file_split_idx =
           atoi(split_filename_string[split_filename_string.size() - 1].c_str());
       int file_shard_idx =
@@ -1798,10 +1798,10 @@ int32_t SSDSparseTable::LoadWithBinary(const std::string& path, int param) {
         int use_sst = 0;
         if (file_split_idx != 0) {
           std::string path =
-              paddle::string::format_string("%s_%d/part-%03d.sst",
-                                            FLAGS_rocksdb_path.c_str(),
-                                            shard_idx,
-                                            file_split_idx);
+              ::paddle::string::format_string("%s_%d/part-%03d.sst",
+                                              FLAGS_rocksdb_path.c_str(),
+                                              shard_idx,
+                                              file_split_idx);
           rocksdb::Status status = sst_writer.Open(path);
           if (!status.ok()) {
             VLOG(0) << "sst writer open " << path << "failed";
@@ -1925,7 +1925,7 @@ int32_t SSDSparseTable::LoadWithBinary(const std::string& path, int param) {
   }
   tasks.clear();
   for (int shard_idx = 0; shard_idx < _real_local_shard_num; shard_idx++) {
-    auto sst_filelist = _afs_client.list(paddle::string::format_string(
+    auto sst_filelist = _afs_client.list(::paddle::string::format_string(
         "%s_%d/part-*", FLAGS_rocksdb_path.c_str(), shard_idx));
     if (!sst_filelist.empty()) {
       int ret = _db->ingest_externel_file(shard_idx, sst_filelist);
@@ -2034,10 +2034,10 @@ int32_t SSDSparseTable::CacheTable(uint16_t pass_id) {
             if (!datas.empty()) {
               rocksdb::SstFileWriter sst_writer(rocksdb::EnvOptions(), options);
               std::string filename =
-                  paddle::string::format_string("%s_%d/cache-%05d.sst",
-                                                FLAGS_rocksdb_path.c_str(),
-                                                shard_id,
-                                                cache_table_count);
+                  ::paddle::string::format_string("%s_%d/cache-%05d.sst",
+                                                  FLAGS_rocksdb_path.c_str(),
+                                                  shard_id,
+                                                  cache_table_count);
               rocksdb::Status status = sst_writer.Open(filename);
               if (!status.ok()) {
                 VLOG(0) << "sst writer open " << filename << "failed"
diff --git a/paddle/fluid/distributed/ps/table/ssd_sparse_table.h b/paddle/fluid/distributed/ps/table/ssd_sparse_table.h
index e5561c5e42b..c003061d815 100644
--- a/paddle/fluid/distributed/ps/table/ssd_sparse_table.h
+++ b/paddle/fluid/distributed/ps/table/ssd_sparse_table.h
@@ -14,9 +14,9 @@
 
 #pragma once
 
-#include "gflags/gflags.h"
 #include "paddle/fluid/distributed/ps/table/depends/rocksdb_warpper.h"
 #include "paddle/fluid/distributed/ps/table/memory_sparse_table.h"
+#include "paddle/utils/flags.h"
 
 namespace paddle {
 namespace distributed {
diff --git a/paddle/fluid/distributed/ps/table/table.h b/paddle/fluid/distributed/ps/table/table.h
index b64e05e3b0a..dc44831e891 100644
--- a/paddle/fluid/distributed/ps/table/table.h
+++ b/paddle/fluid/distributed/ps/table/table.h
@@ -118,7 +118,7 @@ class Table {
   virtual int32_t SaveCache(
       const std::string &path UNUSED,
       const std::string &param UNUSED,
-      paddle::framework::Channel<std::pair<uint64_t, std::string>>
+      ::paddle::framework::Channel<std::pair<uint64_t, std::string>>
           &shuffled_channel UNUSED) {
     return 0;
   }
@@ -130,7 +130,7 @@ class Table {
       std::function<std::future<int32_t>(
           int msg_type, int to_pserver_id, std::string &msg)>  // NOLINT
           send_msg_func UNUSED,
-      paddle::framework::Channel<std::pair<uint64_t, std::string>>
+      ::paddle::framework::Channel<std::pair<uint64_t, std::string>>
           &shuffled_channel UNUSED,
       const std::vector<Table *> &table_ptrs UNUSED) {
     return 0;
@@ -161,7 +161,7 @@ class Table {
   virtual int32_t InitializeAccessor();
   virtual int32_t InitializeShard() = 0;
   virtual std::string TableDir(const std::string &model_dir) {
-    return paddle::string::format_string(
+    return ::paddle::string::format_string(
         "%s/%03d/", model_dir.c_str(), _config.table_id());
   }
 
diff --git a/paddle/fluid/distributed/ps/wrapper/fleet.cc b/paddle/fluid/distributed/ps/wrapper/fleet.cc
index d25ad89d504..87eb250545a 100644
--- a/paddle/fluid/distributed/ps/wrapper/fleet.cc
+++ b/paddle/fluid/distributed/ps/wrapper/fleet.cc
@@ -30,8 +30,10 @@ const uint32_t MAX_FEASIGN_NUM = 1024 * 100 * 100;
 std::shared_ptr<FleetWrapper> FleetWrapper::s_instance_ = NULL;
 bool FleetWrapper::is_initialized_ = false;
 
-std::shared_ptr<paddle::distributed::PSCore> FleetWrapper::pserver_ptr_ = NULL;
-std::shared_ptr<paddle::distributed::PSClient> FleetWrapper::worker_ptr_ = NULL;
+std::shared_ptr<::paddle::distributed::PSCore> FleetWrapper::pserver_ptr_ =
+    NULL;
+std::shared_ptr<::paddle::distributed::PSClient> FleetWrapper::worker_ptr_ =
+    NULL;
 
 int FleetWrapper::RegisterHeterCallback(HeterCallBackFunc handler) {
   VLOG(0) << "RegisterHeterCallback support later";
@@ -76,8 +78,8 @@ void FleetWrapper::InitServer(
     const std::vector<framework::ProgramDesc>& server_sub_program) {
   if (!is_initialized_) {
     VLOG(3) << "Going to init server";
-    pserver_ptr_ = std::shared_ptr<paddle::distributed::PSCore>(
-        new paddle::distributed::PSCore());
+    pserver_ptr_ = std::shared_ptr<::paddle::distributed::PSCore>(
+        new ::paddle::distributed::PSCore());
     pserver_ptr_->InitServer(dist_desc,
                              &host_sign_list,
                              host_sign_list.size(),
@@ -92,7 +94,7 @@ void FleetWrapper::InitServer(
 
 void FleetWrapper::InitGFlag(const std::string& gflags) {
   VLOG(3) << "Init With Gflags:" << gflags;
-  std::vector<std::string> flags = paddle::string::split_string(gflags);
+  std::vector<std::string> flags = ::paddle::string::split_string(gflags);
   if (flags.empty()) {
     flags.push_back("-max_body_size=314217728");
     flags.push_back("-bthread_concurrency=40");
@@ -107,7 +109,7 @@ void FleetWrapper::InitGFlag(const std::string& gflags) {
   }
   int params_cnt = flags.size();
   char** params_ptr = &(flags_ptr[0]);
-  ::GFLAGS_NAMESPACE::ParseCommandLineFlags(&params_cnt, &params_ptr, true);
+  ::paddle::flags::ParseCommandLineFlags(&params_cnt, &params_ptr);
 }
 
 void FleetWrapper::InitWorker(const std::string& dist_desc,
@@ -116,17 +118,17 @@ void FleetWrapper::InitWorker(const std::string& dist_desc,
   if (!is_initialized_) {
     // not used, just for psclient's init
     // TODO(zhaocaibei123): remove this later
-    std::map<uint64_t, std::vector<paddle::distributed::Region>>
+    std::map<uint64_t, std::vector<::paddle::distributed::Region>>
         dense_pull_regions;
 
     if (worker_ptr_.get() == nullptr) {
-      paddle::distributed::PSParameter ps_param;
+      ::paddle::distributed::PSParameter ps_param;
       google::protobuf::TextFormat::ParseFromString(dist_desc, &ps_param);
       InitGFlag(ps_param.init_gflags());
       int servers = host_sign_list.size();
       ps_env_.SetPsServers(&host_sign_list, servers);
-      worker_ptr_ = std::shared_ptr<paddle::distributed::PSClient>(
-          paddle::distributed::PSClientFactory::Create(ps_param));
+      worker_ptr_ = std::shared_ptr<::paddle::distributed::PSClient>(
+          ::paddle::distributed::PSClientFactory::Create(ps_param));
       worker_ptr_->Configure(ps_param, dense_pull_regions, ps_env_, index);
     }
     dist_desc_ = dist_desc;
@@ -392,7 +394,7 @@ void FleetWrapper::PullDenseVarsAsync(
     Variable* var = scope.FindVar(varname);
     phi::DenseTensor* tensor = var->GetMutable<phi::DenseTensor>();
     float* w = tensor->data<float>();
-    paddle::distributed::Region reg(w, tensor->numel());
+    ::paddle::distributed::Region reg(w, tensor->numel());
     regions[i] = std::move(reg);
   }
 
@@ -412,7 +414,7 @@ void FleetWrapper::PullDenseVarsSync(
     phi::DenseTensor* tensor = var->GetMutable<phi::DenseTensor>();
     if (!platform::is_gpu_place(tensor->place())) {
       float* w = tensor->data<float>();
-      paddle::distributed::Region reg(w, tensor->numel());
+      ::paddle::distributed::Region reg(w, tensor->numel());
       regions.emplace_back(std::move(reg));
     }
   }
@@ -425,14 +427,14 @@ void FleetWrapper::PushDenseParamSync(
     const uint64_t table_id,
     const std::vector<std::string>& var_names) {
   auto place = platform::CPUPlace();
-  std::vector<paddle::distributed::Region> regions;
+  std::vector<::paddle::distributed::Region> regions;
   for (auto& t : var_names) {
     Variable* var = scope.FindVar(t);
     CHECK(var != nullptr) << "var[" << t << "] not found";
     phi::DenseTensor* tensor = var->GetMutable<phi::DenseTensor>();
     if (!platform::is_gpu_place(tensor->place())) {
       float* g = tensor->mutable_data<float>(place);
-      paddle::distributed::Region reg(g, tensor->numel());
+      ::paddle::distributed::Region reg(g, tensor->numel());
       regions.emplace_back(std::move(reg));
     }
   }
@@ -456,7 +458,7 @@ void FleetWrapper::PushDenseVarsAsync(
     float scale_datanorm,
     int batch_size) {
   auto place = platform::CPUPlace();
-  std::vector<paddle::distributed::Region> regions;
+  std::vector<::paddle::distributed::Region> regions;
   for (auto& t : var_names) {
     Variable* var = scope.FindVar(t);
     CHECK(var != nullptr) << "var[" << t << "] not found";
@@ -479,7 +481,7 @@ void FleetWrapper::PushDenseVarsAsync(
       }
     }
 
-    paddle::distributed::Region reg(g, tensor->numel());
+    ::paddle::distributed::Region reg(g, tensor->numel());
     regions.emplace_back(std::move(reg));
     VLOG(3) << "FleetWrapper::PushDenseVarsAsync Var " << t << " talbe_id "
             << table_id << " Temp_data[0] " << g[0] << " Temp_data[-1] "
@@ -774,7 +776,7 @@ void FleetWrapper::ShrinkDenseTable(int table_id,
                                     std::vector<std::string> var_list,
                                     float decay,
                                     int emb_dim) {
-  std::vector<paddle::distributed::Region> regions;
+  std::vector<::paddle::distributed::Region> regions;
   for (std::string& name : var_list) {
     if (name.find("batch_sum") != std::string::npos) {
       Variable* var = scope->FindVar(name);
@@ -795,14 +797,14 @@ void FleetWrapper::ShrinkDenseTable(int table_id,
       for (int k = 0; k < tensor->numel(); k += emb_dim) {
         g[k] = g[k] + g_size[k] * log(decay);
       }
-      paddle::distributed::Region reg(g, tensor->numel());
+      ::paddle::distributed::Region reg(g, tensor->numel());
       regions.emplace_back(std::move(reg));
     } else {
       Variable* var = scope->FindVar(name);
       CHECK(var != nullptr) << "var[" << name << "] not found";
       phi::DenseTensor* tensor = var->GetMutable<phi::DenseTensor>();
       float* g = tensor->data<float>();
-      paddle::distributed::Region reg(g, tensor->numel());
+      ::paddle::distributed::Region reg(g, tensor->numel());
       regions.emplace_back(std::move(reg));
     }
   }
diff --git a/paddle/fluid/distributed/ps/wrapper/fleet.h b/paddle/fluid/distributed/ps/wrapper/fleet.h
index 9bf6f3c84a9..22dc0f1af72 100644
--- a/paddle/fluid/distributed/ps/wrapper/fleet.h
+++ b/paddle/fluid/distributed/ps/wrapper/fleet.h
@@ -295,7 +295,7 @@ class FleetWrapper {
   // FleetWrapper singleton
   static std::shared_ptr<FleetWrapper> GetInstance() {
     if (NULL == s_instance_) {
-      s_instance_.reset(new paddle::distributed::FleetWrapper());
+      s_instance_.reset(new ::paddle::distributed::FleetWrapper());
     }
     return s_instance_;
   }
@@ -322,13 +322,13 @@ class FleetWrapper {
   std::string PullFlStrategy();
   //**********
 
-  static std::shared_ptr<paddle::distributed::PSCore> pserver_ptr_;
-  static std::shared_ptr<paddle::distributed::PSClient> worker_ptr_;
+  static std::shared_ptr<::paddle::distributed::PSCore> pserver_ptr_;
+  static std::shared_ptr<::paddle::distributed::PSClient> worker_ptr_;
 
  private:
   static std::shared_ptr<FleetWrapper> s_instance_;
   std::string dist_desc_;
-  paddle::distributed::PaddlePSEnvironment ps_env_;
+  ::paddle::distributed::PaddlePSEnvironment ps_env_;
   size_t GetAbsoluteSum(size_t start,
                         size_t end,
                         size_t level,
@@ -336,7 +336,7 @@ class FleetWrapper {
 
  protected:
   static bool is_initialized_;
-  std::map<uint64_t, std::vector<paddle::distributed::Region>> regions_;
+  std::map<uint64_t, std::vector<::paddle::distributed::Region>> regions_;
   bool scale_sparse_gradient_with_batch_size_;
   int32_t sleep_seconds_before_fail_exit_;
   int client2client_request_timeout_ms_;
diff --git a/paddle/fluid/distributed/rpc/CMakeLists.txt b/paddle/fluid/distributed/rpc/CMakeLists.txt
index 76c6dc00110..4042a6fe3cc 100644
--- a/paddle/fluid/distributed/rpc/CMakeLists.txt
+++ b/paddle/fluid/distributed/rpc/CMakeLists.txt
@@ -12,17 +12,7 @@ set_source_files_properties(
 set_source_files_properties(rpc_agent.cc PROPERTIES COMPILE_FLAGS
                                                     ${DISTRIBUTE_COMPILE_FLAGS})
 
-set(PADDLE_RPC_DEPS
-    brpc
-    ssl
-    crypto
-    protobuf
-    zlib
-    leveldb
-    snappy
-    phi
-    glog
-    pybind)
+set(PADDLE_RPC_DEPS ${EXTERNAL_BRPC_DEPS} zlib phi pybind)
 proto_library(paddle_rpc_proto SRCS rpc.proto)
 cc_library(
   paddle_rpc
diff --git a/paddle/fluid/framework/CMakeLists.txt b/paddle/fluid/framework/CMakeLists.txt
index 182c83cc37b..055163ed620 100755
--- a/paddle/fluid/framework/CMakeLists.txt
+++ b/paddle/fluid/framework/CMakeLists.txt
@@ -228,13 +228,13 @@ cc_test(
 
 set(BRPC_DEPS "")
 if(WITH_PSCORE)
-  set(BRPC_DEPS brpc ssl crypto)
+  set(BRPC_DEPS ${EXTERNAL_BRPC_DEPS})
 endif()
 if(WITH_PSLIB)
   if(WITH_PSLIB_BRPC)
     set(BRPC_DEPS pslib_brpc)
   elseif(NOT WITH_HETERPS)
-    set(BRPC_DEPS brpc ssl crypto)
+    set(BRPC_DEPS ${EXTERNAL_BRPC_DEPS})
   endif()
   if(WITH_ARM_BRPC)
     set(BRPC_DEPS arm_brpc)
@@ -833,7 +833,7 @@ if(WITH_DISTRIBUTE)
            heter_service_proto
            fleet
            heter_server
-           brpc
+           ${${EXTERNAL_BRPC_DEPS}}
            phi)
     set(DISTRIBUTE_COMPILE_FLAGS "")
     if(CMAKE_CXX_COMPILER_VERSION VERSION_GREATER 7.0)
diff --git a/paddle/fluid/framework/async_executor.cc b/paddle/fluid/framework/async_executor.cc
index c5d898beba1..055381f0d58 100644
--- a/paddle/fluid/framework/async_executor.cc
+++ b/paddle/fluid/framework/async_executor.cc
@@ -14,7 +14,6 @@ limitations under the License. */
 
 #include "paddle/fluid/framework/async_executor.h"
 
-#include "gflags/gflags.h"
 #include "google/protobuf/io/zero_copy_stream_impl.h"
 #include "google/protobuf/message.h"
 #include "google/protobuf/text_format.h"
@@ -32,6 +31,7 @@ limitations under the License. */
 #include "paddle/fluid/inference/io.h"
 #include "paddle/fluid/platform/place.h"
 #include "paddle/fluid/pybind/pybind.h"
+#include "paddle/utils/flags.h"
 
 // phi
 #include "paddle/phi/kernels/declarations.h"
diff --git a/paddle/fluid/framework/copy_same_tensor_test.cc b/paddle/fluid/framework/copy_same_tensor_test.cc
index 10e0b76f004..9b892c0c1b0 100644
--- a/paddle/fluid/framework/copy_same_tensor_test.cc
+++ b/paddle/fluid/framework/copy_same_tensor_test.cc
@@ -16,15 +16,15 @@
 
 #include <random>
 
-#include "gflags/gflags.h"
 #include "gtest/gtest.h"
 #include "paddle/fluid/framework/tensor.h"
 #include "paddle/fluid/framework/tensor_util.h"
 #include "paddle/fluid/platform/device_context.h"
 #include "paddle/fluid/platform/place.h"
 #include "paddle/phi/core/ddim.h"
+#include "paddle/utils/flags.h"
 
-DECLARE_bool(use_system_allocator);
+PD_DECLARE_bool(use_system_allocator);
 
 namespace paddle {
 namespace framework {
diff --git a/paddle/fluid/framework/details/bkcl_op_handle.h b/paddle/fluid/framework/details/bkcl_op_handle.h
index 4ca8bf4cb58..8a5afcf04bf 100644
--- a/paddle/fluid/framework/details/bkcl_op_handle.h
+++ b/paddle/fluid/framework/details/bkcl_op_handle.h
@@ -24,7 +24,7 @@
 #include "paddle/fluid/platform/device/xpu/bkcl_helper.h"
 #include "xpu/bkcl.h"
 
-DECLARE_bool(sync_bkcl_allreduce);
+PD_DECLARE_bool(sync_bkcl_allreduce);
 
 namespace paddle {
 namespace framework {
diff --git a/paddle/fluid/framework/details/build_strategy.cc b/paddle/fluid/framework/details/build_strategy.cc
index d295ff6ad5a..5a6f4e6e70d 100644
--- a/paddle/fluid/framework/details/build_strategy.cc
+++ b/paddle/fluid/framework/details/build_strategy.cc
@@ -22,10 +22,10 @@ limitations under the License. */
 #include "paddle/fluid/framework/ir/multi_devices_graph_pass/multi_devices_graph_pass.h"
 #include "paddle/phi/core/flags.h"
 
-DECLARE_bool(convert_all_blocks);
+PD_DECLARE_bool(convert_all_blocks);
 PHI_DECLARE_bool(use_mkldnn);
 #ifdef PADDLE_WITH_CINN
-DECLARE_bool(use_cinn);
+PD_DECLARE_bool(use_cinn);
 #endif
 
 namespace paddle {
diff --git a/paddle/fluid/framework/details/build_strategy_test.cc b/paddle/fluid/framework/details/build_strategy_test.cc
index 7ec7d93ee66..0990f134b3e 100644
--- a/paddle/fluid/framework/details/build_strategy_test.cc
+++ b/paddle/fluid/framework/details/build_strategy_test.cc
@@ -30,7 +30,7 @@
 #include "paddle/fluid/framework/var_type_inference.h"
 #include "paddle/fluid/platform/place.h"
 
-DECLARE_bool(convert_all_blocks);
+PD_DECLARE_bool(convert_all_blocks);
 
 namespace paddle {
 namespace framework {
diff --git a/paddle/fluid/framework/details/fused_all_reduce_op_handle.cc b/paddle/fluid/framework/details/fused_all_reduce_op_handle.cc
index 66428661503..a075b4702e9 100644
--- a/paddle/fluid/framework/details/fused_all_reduce_op_handle.cc
+++ b/paddle/fluid/framework/details/fused_all_reduce_op_handle.cc
@@ -21,7 +21,7 @@
 #include "paddle/phi/backends/device_memory_aligment.h"
 #include "paddle/phi/core/flags.h"
 
-DEFINE_bool(skip_fused_all_reduce_check, false, "");  // NOLINT
+PD_DEFINE_bool(skip_fused_all_reduce_check, false, "");  // NOLINT
 PHI_DECLARE_bool(allreduce_record_one_event);
 
 namespace paddle {
diff --git a/paddle/fluid/framework/executor.cc b/paddle/fluid/framework/executor.cc
index 50c8cc926dc..f18705ef099 100644
--- a/paddle/fluid/framework/executor.cc
+++ b/paddle/fluid/framework/executor.cc
@@ -31,7 +31,7 @@ limitations under the License. */
 #include "paddle/fluid/framework/executor_gc_helper.h"
 #include "paddle/phi/core/flags.h"
 
-DECLARE_bool(benchmark);
+PD_DECLARE_bool(benchmark);
 PHI_DECLARE_bool(use_mkldnn);
 
 namespace paddle {
diff --git a/paddle/fluid/framework/executor_thread_worker.cc b/paddle/fluid/framework/executor_thread_worker.cc
index 4bbcba2151b..a3fb850373a 100644
--- a/paddle/fluid/framework/executor_thread_worker.cc
+++ b/paddle/fluid/framework/executor_thread_worker.cc
@@ -17,7 +17,6 @@ limitations under the License. */
 #include <algorithm>
 #include <utility>
 
-#include "gflags/gflags.h"
 #include "google/protobuf/io/zero_copy_stream_impl.h"
 #include "google/protobuf/message.h"
 #include "google/protobuf/text_format.h"
@@ -34,6 +33,7 @@ limitations under the License. */
 #include "paddle/fluid/platform/place.h"
 #include "paddle/fluid/platform/timer.h"
 #include "paddle/fluid/pybind/pybind.h"
+#include "paddle/utils/flags.h"
 
 // phi
 #include "paddle/phi/kernels/declarations.h"
diff --git a/paddle/fluid/framework/fleet/CMakeLists.txt b/paddle/fluid/framework/fleet/CMakeLists.txt
index 7ebc58e61b5..e2ba9c6d4f4 100644
--- a/paddle/fluid/framework/fleet/CMakeLists.txt
+++ b/paddle/fluid/framework/fleet/CMakeLists.txt
@@ -3,7 +3,7 @@ if(WITH_PSLIB)
     set(BRPC_DEPS pslib_brpc)
   else()
     if(NOT WITH_HETERPS)
-      set(BRPC_DEPS brpc)
+      set(BRPC_DEPS ${EXTERNAL_BRPC_DEPS})
     endif()
   endif()
   cc_library(
diff --git a/paddle/fluid/framework/garbage_collector.cc b/paddle/fluid/framework/garbage_collector.cc
index 3a514d26ddb..01f1baf9be1 100644
--- a/paddle/fluid/framework/garbage_collector.cc
+++ b/paddle/fluid/framework/garbage_collector.cc
@@ -16,10 +16,10 @@
 #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
 #include "paddle/fluid/platform/cuda_device_guard.h"
 #endif
-#include "gflags/gflags.h"
 #include "paddle/fluid/framework/garbage_collector.h"
 #include "paddle/fluid/platform/device/device_wrapper.h"
 #include "paddle/phi/core/flags.h"
+#include "paddle/utils/flags.h"
 
 PHI_DECLARE_double(eager_delete_tensor_gb);
 PHI_DECLARE_double(memory_fraction_of_eager_deletion);
diff --git a/paddle/fluid/framework/garbage_collector.h b/paddle/fluid/framework/garbage_collector.h
index ec5766882e6..fa45bbcbdd2 100644
--- a/paddle/fluid/framework/garbage_collector.h
+++ b/paddle/fluid/framework/garbage_collector.h
@@ -20,9 +20,9 @@
 #include <mutex>  // NOLINT
 #include <utility>
 
-#include "gflags/gflags.h"
 #include "paddle/fluid/platform/device_context.h"
 #include "paddle/fluid/platform/stream_callback_manager.h"
+#include "paddle/utils/flags.h"
 
 namespace paddle {
 namespace framework {
diff --git a/paddle/fluid/framework/ir/graph.h b/paddle/fluid/framework/ir/graph.h
index 3eb2df7011c..3596f4e0f0e 100644
--- a/paddle/fluid/framework/ir/graph.h
+++ b/paddle/fluid/framework/ir/graph.h
@@ -14,8 +14,6 @@ limitations under the License. */
 
 #pragma once
 
-#include <gflags/gflags.h>
-
 #include <map>
 #include <memory>
 #include <string>
@@ -27,8 +25,9 @@ limitations under the License. */
 #include "paddle/fluid/platform/enforce.h"
 
 #include "paddle/utils/any.h"
+#include "paddle/utils/flags.h"
 
-DECLARE_bool(convert_all_blocks);
+PD_DECLARE_bool(convert_all_blocks);
 
 namespace paddle {
 namespace framework {
diff --git a/paddle/fluid/framework/ir/graph_helper.cc b/paddle/fluid/framework/ir/graph_helper.cc
index aee64ba89ac..87a710cd036 100644
--- a/paddle/fluid/framework/ir/graph_helper.cc
+++ b/paddle/fluid/framework/ir/graph_helper.cc
@@ -29,7 +29,7 @@ limitations under the License. */
 #include "paddle/fluid/platform/collective_helper.h"
 #endif
 #include "paddle/fluid/platform/flags.h"
-DECLARE_bool(convert_all_blocks);
+PD_DECLARE_bool(convert_all_blocks);
 PADDLE_DEFINE_EXPORTED_string(print_sub_graph_dir,
                               "",
                               "FLAGS_print_sub_graph_dir is used "
diff --git a/paddle/fluid/framework/ir/graph_to_program_pass.cc b/paddle/fluid/framework/ir/graph_to_program_pass.cc
index f57cdd9d974..e1ed5ff5b04 100644
--- a/paddle/fluid/framework/ir/graph_to_program_pass.cc
+++ b/paddle/fluid/framework/ir/graph_to_program_pass.cc
@@ -14,11 +14,10 @@ limitations under the License. */
 
 #include "paddle/fluid/framework/ir/graph_to_program_pass.h"
 
-#include <gflags/gflags.h>
-
 #include <algorithm>
 
 #include "paddle/fluid/framework/op_proto_maker.h"
+#include "paddle/utils/flags.h"
 
 namespace paddle {
 namespace framework {
diff --git a/paddle/fluid/framework/new_executor/executor_statistics.cc b/paddle/fluid/framework/new_executor/executor_statistics.cc
index 14436e9c763..9d789c9957e 100644
--- a/paddle/fluid/framework/new_executor/executor_statistics.cc
+++ b/paddle/fluid/framework/new_executor/executor_statistics.cc
@@ -28,7 +28,7 @@
 #include "paddle/fluid/platform/os_info.h"
 #include "paddle/fluid/platform/profiler/utils.h"
 
-DECLARE_bool(use_stream_safe_cuda_allocator);
+PD_DECLARE_bool(use_stream_safe_cuda_allocator);
 PADDLE_DEFINE_EXPORTED_string(static_executor_perfstat_filepath,
                               "",
                               "FLAGS_static_executor_perfstat_filepath "
diff --git a/paddle/fluid/framework/new_executor/interpreter/dependency_builder.h b/paddle/fluid/framework/new_executor/interpreter/dependency_builder.h
index 4e3fb8d1b24..18a26ea770c 100644
--- a/paddle/fluid/framework/new_executor/interpreter/dependency_builder.h
+++ b/paddle/fluid/framework/new_executor/interpreter/dependency_builder.h
@@ -19,7 +19,7 @@
 
 #include "paddle/fluid/framework/new_executor/new_executor_defs.h"
 
-DECLARE_bool(new_executor_sequential_run);
+PD_DECLARE_bool(new_executor_sequential_run);
 
 namespace paddle {
 namespace framework {
diff --git a/paddle/fluid/framework/new_executor/interpreter/execution_config.cc b/paddle/fluid/framework/new_executor/interpreter/execution_config.cc
index 1e6a6f02e22..cf3195bb8c2 100644
--- a/paddle/fluid/framework/new_executor/interpreter/execution_config.cc
+++ b/paddle/fluid/framework/new_executor/interpreter/execution_config.cc
@@ -22,7 +22,7 @@
 #include "paddle/phi/backends/gpu/gpu_info.h"
 #include "paddle/phi/backends/xpu/xpu_info.h"
 
-DECLARE_bool(new_executor_serial_run);
+PD_DECLARE_bool(new_executor_serial_run);
 
 namespace paddle {
 namespace framework {
diff --git a/paddle/fluid/framework/new_executor/interpreter_base_impl.h b/paddle/fluid/framework/new_executor/interpreter_base_impl.h
index 934dd44771e..88ac481d16c 100644
--- a/paddle/fluid/framework/new_executor/interpreter_base_impl.h
+++ b/paddle/fluid/framework/new_executor/interpreter_base_impl.h
@@ -20,8 +20,8 @@
 #include <unordered_map>
 #include <unordered_set>
 #include <vector>
-#include "gflags/gflags.h"
 #include "paddle/fluid/platform/flags.h"
+#include "paddle/utils/flags.h"
 
 #include "paddle/fluid/framework/details/exception_holder.h"
 #include "paddle/fluid/framework/new_executor/garbage_collector/garbage_collector.h"
@@ -38,14 +38,14 @@
 #include "paddle/fluid/platform/device_event.h"
 #include "paddle/phi/backends/device_manager.h"
 
-DECLARE_bool(new_executor_serial_run);
-DECLARE_bool(new_executor_static_build);
-DECLARE_bool(new_executor_use_inplace);
-DECLARE_bool(new_executor_use_local_scope);
+PD_DECLARE_bool(new_executor_serial_run);
+PD_DECLARE_bool(new_executor_static_build);
+PD_DECLARE_bool(new_executor_use_inplace);
+PD_DECLARE_bool(new_executor_use_local_scope);
 
 PHI_DECLARE_bool(check_nan_inf);
-DECLARE_bool(benchmark);
-DECLARE_uint64(executor_log_deps_every_microseconds);
+PD_DECLARE_bool(benchmark);
+PD_DECLARE_uint64(executor_log_deps_every_microseconds);
 PHI_DECLARE_bool(new_executor_use_cuda_graph);
 PHI_DECLARE_bool(enable_new_ir_in_executor);
 #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
diff --git a/paddle/fluid/framework/new_executor/interpretercore.h b/paddle/fluid/framework/new_executor/interpretercore.h
index 191f0b92eb8..f01c12b27c3 100644
--- a/paddle/fluid/framework/new_executor/interpretercore.h
+++ b/paddle/fluid/framework/new_executor/interpretercore.h
@@ -15,7 +15,7 @@
 
 #include "paddle/fluid/framework/new_executor/interpreter_base_impl.h"
 
-DECLARE_bool(new_executor_use_local_scope);
+PD_DECLARE_bool(new_executor_use_local_scope);
 
 namespace ir {
 class Program;
diff --git a/paddle/fluid/framework/new_executor/new_ir_interpreter.cc b/paddle/fluid/framework/new_executor/new_ir_interpreter.cc
index df9f398770e..e026e914adb 100644
--- a/paddle/fluid/framework/new_executor/new_ir_interpreter.cc
+++ b/paddle/fluid/framework/new_executor/new_ir_interpreter.cc
@@ -16,7 +16,7 @@
 
 #include <unordered_set>
 
-#include "gflags/gflags.h"
+#include "paddle/utils/flags.h"
 
 #include "paddle/fluid/framework/details/nan_inf_utils.h"
 #include "paddle/fluid/framework/details/share_tensor_buffer_functor.h"
diff --git a/paddle/fluid/framework/op_kernel_type.h b/paddle/fluid/framework/op_kernel_type.h
index c7193c68d3c..558ae8f9b90 100644
--- a/paddle/fluid/framework/op_kernel_type.h
+++ b/paddle/fluid/framework/op_kernel_type.h
@@ -16,7 +16,6 @@ limitations under the License. */
 
 #include <string>
 
-#include "gflags/gflags.h"
 #include "paddle/fluid/framework/data_layout.h"
 #include "paddle/fluid/framework/data_type.h"
 #include "paddle/fluid/framework/library_type.h"
@@ -25,8 +24,9 @@ limitations under the License. */
 #include "paddle/phi/core/device_context.h"
 #include "paddle/phi/core/enforce.h"
 #include "paddle/phi/core/kernel_factory.h"
+#include "paddle/utils/flags.h"
 
-DECLARE_bool(use_stride_kernel);
+PD_DECLARE_bool(use_stride_kernel);
 
 namespace paddle {
 namespace framework {
diff --git a/paddle/fluid/framework/operator.cc b/paddle/fluid/framework/operator.cc
index 65a150b7ccb..0c03486fdd7 100644
--- a/paddle/fluid/framework/operator.cc
+++ b/paddle/fluid/framework/operator.cc
@@ -17,7 +17,6 @@ limitations under the License. */
 #include <string>
 #include <unordered_set>
 
-#include "gflags/gflags.h"
 #include "paddle/fluid/framework/convert_utils.h"
 #include "paddle/fluid/framework/data_transform.h"
 #include "paddle/fluid/framework/data_type_transform.h"
@@ -43,6 +42,7 @@ limitations under the License. */
 #include "paddle/phi/core/kernel_context.h"
 #include "paddle/phi/core/kernel_factory.h"
 #include "paddle/phi/ops/compat/signatures.h"
+#include "paddle/utils/flags.h"
 
 namespace phi {
 class DenseTensor;
@@ -62,9 +62,9 @@ class DenseTensor;
 #include "paddle/fluid/platform/device/gpu/gpu_dnn.h"
 #endif
 
-DECLARE_bool(benchmark);
+PD_DECLARE_bool(benchmark);
 PHI_DECLARE_bool(check_nan_inf);
-DECLARE_bool(enable_unused_var_check);
+PD_DECLARE_bool(enable_unused_var_check);
 PHI_DECLARE_bool(run_kp_kernel);
 PHI_DECLARE_bool(enable_host_event_recorder_hook);
 
diff --git a/paddle/fluid/framework/operator_test.cc b/paddle/fluid/framework/operator_test.cc
index 1d57efd875f..baca5b3f067 100644
--- a/paddle/fluid/framework/operator_test.cc
+++ b/paddle/fluid/framework/operator_test.cc
@@ -19,7 +19,7 @@ limitations under the License. */
 #include "paddle/fluid/platform/errors.h"
 #include "paddle/fluid/platform/init.h"
 
-DECLARE_bool(enable_unused_var_check);
+PD_DECLARE_bool(enable_unused_var_check);
 
 namespace paddle {
 namespace framework {
diff --git a/paddle/fluid/framework/paddle2cinn/build_cinn_pass.cc b/paddle/fluid/framework/paddle2cinn/build_cinn_pass.cc
index 9e2da446b26..fd0dcff440b 100644
--- a/paddle/fluid/framework/paddle2cinn/build_cinn_pass.cc
+++ b/paddle/fluid/framework/paddle2cinn/build_cinn_pass.cc
@@ -24,7 +24,6 @@ limitations under the License. */
 #include <utility>
 #include <vector>
 
-#include "gflags/gflags.h"
 #include "glog/logging.h"
 #include "paddle/cinn/frontend/op_mapper_registry.h"
 #include "paddle/cinn/frontend/op_mappers/use_op_mappers.h"
@@ -38,9 +37,10 @@ limitations under the License. */
 #include "paddle/fluid/operators/cinn/cinn_launch_op.h"
 #include "paddle/fluid/platform/enforce.h"
 #include "paddle/fluid/platform/errors.h"
+#include "paddle/utils/flags.h"
 
-DECLARE_string(allow_cinn_ops);
-DECLARE_string(deny_cinn_ops);
+PD_DECLARE_string(allow_cinn_ops);
+PD_DECLARE_string(deny_cinn_ops);
 
 namespace paddle {
 namespace framework {
diff --git a/paddle/fluid/framework/paddle2cinn/cinn_compiler.cc b/paddle/fluid/framework/paddle2cinn/cinn_compiler.cc
index 29ea6098088..e0ddafd37da 100644
--- a/paddle/fluid/framework/paddle2cinn/cinn_compiler.cc
+++ b/paddle/fluid/framework/paddle2cinn/cinn_compiler.cc
@@ -22,7 +22,6 @@
 #include <string>
 #include <unordered_map>
 
-#include "gflags/gflags.h"
 #include "paddle/cinn/auto_schedule/auto_tuner.h"
 #include "paddle/cinn/auto_schedule/tuning.h"
 #include "paddle/cinn/common/target.h"
@@ -52,6 +51,7 @@
 #include "paddle/ir/core/program.h"
 #include "paddle/ir/core/value.h"
 #include "paddle/phi/core/flags.h"
+#include "paddle/utils/flags.h"
 
 PHI_DECLARE_bool(enable_pe_launch_cinn);
 PHI_DECLARE_bool(enable_cinn_auto_tune);
diff --git a/paddle/fluid/framework/paddle2cinn/cinn_compiler_test.cc b/paddle/fluid/framework/paddle2cinn/cinn_compiler_test.cc
index 4c66bc787ef..519b7811574 100644
--- a/paddle/fluid/framework/paddle2cinn/cinn_compiler_test.cc
+++ b/paddle/fluid/framework/paddle2cinn/cinn_compiler_test.cc
@@ -23,7 +23,6 @@
 #include <unordered_set>
 #include <vector>
 
-#include "gflags/gflags.h"
 #include "glog/logging.h"
 #include "gtest/gtest.h"
 #include "paddle/cinn/common/target.h"
@@ -38,6 +37,7 @@
 #include "paddle/fluid/platform/place.h"
 #include "paddle/phi/core/ddim.h"
 #include "paddle/phi/core/flags.h"
+#include "paddle/utils/flags.h"
 
 PHI_DECLARE_string(allow_cinn_ops);
 PHI_DECLARE_string(deny_cinn_ops);
diff --git a/paddle/fluid/framework/scope.cc b/paddle/fluid/framework/scope.cc
index 36b40657bb2..744ce8923a2 100644
--- a/paddle/fluid/framework/scope.cc
+++ b/paddle/fluid/framework/scope.cc
@@ -17,7 +17,7 @@ limitations under the License. */
 #include "glog/logging.h"
 #include "paddle/fluid/framework/threadpool.h"
 #include "paddle/fluid/platform/flags.h"
-DECLARE_bool(benchmark);
+PD_DECLARE_bool(benchmark);
 
 PADDLE_DEFINE_EXPORTED_bool(
     eager_delete_scope,
diff --git a/paddle/fluid/framework/unused_var_check.h b/paddle/fluid/framework/unused_var_check.h
index cc4977e439c..55a3a020a06 100644
--- a/paddle/fluid/framework/unused_var_check.h
+++ b/paddle/fluid/framework/unused_var_check.h
@@ -19,7 +19,7 @@ limitations under the License. */
 #include <string>
 #include <unordered_set>
 
-#include "gflags/gflags.h"
+#include "paddle/utils/flags.h"
 
 namespace paddle {
 namespace framework {
diff --git a/paddle/fluid/imperative/prepared_operator.cc b/paddle/fluid/imperative/prepared_operator.cc
index 11a4d37d799..d336488a423 100644
--- a/paddle/fluid/imperative/prepared_operator.cc
+++ b/paddle/fluid/imperative/prepared_operator.cc
@@ -36,7 +36,7 @@
 #include "paddle/phi/core/flags.h"
 
 PHI_DECLARE_bool(check_nan_inf);
-DECLARE_bool(benchmark);
+PD_DECLARE_bool(benchmark);
 PHI_DECLARE_bool(run_kp_kernel);
 
 namespace paddle {
diff --git a/paddle/fluid/imperative/tracer.cc b/paddle/fluid/imperative/tracer.cc
index f1374bc8f7b..bd3bbf2108e 100644
--- a/paddle/fluid/imperative/tracer.cc
+++ b/paddle/fluid/imperative/tracer.cc
@@ -38,7 +38,7 @@
 PHI_DECLARE_bool(use_mkldnn);
 PHI_DECLARE_string(tracer_mkldnn_ops_on);
 PHI_DECLARE_string(tracer_mkldnn_ops_off);
-DECLARE_bool(use_stride_kernel);
+PD_DECLARE_bool(use_stride_kernel);
 
 namespace paddle {
 namespace imperative {
diff --git a/paddle/fluid/imperative/tracer.h b/paddle/fluid/imperative/tracer.h
index 5e426724aae..49b94c743fd 100644
--- a/paddle/fluid/imperative/tracer.h
+++ b/paddle/fluid/imperative/tracer.h
@@ -32,7 +32,7 @@
 #include "paddle/fluid/platform/macros.h"
 #include "paddle/phi/core/compat/arg_map_context.h"
 
-DECLARE_bool(use_stride_kernel);
+PD_DECLARE_bool(use_stride_kernel);
 namespace paddle {
 namespace imperative {
 
diff --git a/paddle/fluid/inference/CMakeLists.txt b/paddle/fluid/inference/CMakeLists.txt
index 7b6175a9756..48c9f79f34d 100644
--- a/paddle/fluid/inference/CMakeLists.txt
+++ b/paddle/fluid/inference/CMakeLists.txt
@@ -37,6 +37,10 @@ get_property(ir_targets GLOBAL PROPERTY IR_TARGETS)
 get_property(not_infer_modules GLOBAL PROPERTY NOT_INFER_MODULES)
 set(utils_modules pretty_log string_helper benchmark utf8proc)
 
+if(NOT WITH_GFLAGS)
+  set(utils_modules ${utils_modules} paddle_flags)
+endif()
+
 add_subdirectory(api)
 
 # Create static inference library if needed
diff --git a/paddle/fluid/inference/analysis/analyzer.h b/paddle/fluid/inference/analysis/analyzer.h
index 3c053283666..60f86ba10eb 100644
--- a/paddle/fluid/inference/analysis/analyzer.h
+++ b/paddle/fluid/inference/analysis/analyzer.h
@@ -37,8 +37,8 @@ limitations under the License. */
 
 #include <vector>
 
-#include "gflags/gflags.h"
 #include "paddle/fluid/inference/analysis/analysis_pass.h"
+#include "paddle/utils/flags.h"
 
 namespace paddle {
 namespace inference {
diff --git a/paddle/fluid/inference/analysis/passes/ir_params_sync_among_devices_pass.cc b/paddle/fluid/inference/analysis/passes/ir_params_sync_among_devices_pass.cc
index fa35ffc45c2..221b25cae00 100644
--- a/paddle/fluid/inference/analysis/passes/ir_params_sync_among_devices_pass.cc
+++ b/paddle/fluid/inference/analysis/passes/ir_params_sync_among_devices_pass.cc
@@ -27,7 +27,7 @@
 #include "paddle/fluid/platform/place.h"
 #include "paddle/phi/core/dense_tensor.h"
 
-DEFINE_bool(  // NOLINT
+PD_DEFINE_bool(  // NOLINT
     custom_model_save_cpu,
     false,
     "Keep old mode for developers, the model is saved on cpu not device.");
diff --git a/paddle/fluid/inference/analysis/ut_helper.h b/paddle/fluid/inference/analysis/ut_helper.h
index 6c7690a4779..29d123d44ad 100644
--- a/paddle/fluid/inference/analysis/ut_helper.h
+++ b/paddle/fluid/inference/analysis/ut_helper.h
@@ -18,9 +18,9 @@ limitations under the License. */
 #include <fstream>
 #include <string>
 
-#include "gflags/gflags.h"
 #include "paddle/fluid/framework/executor.h"
 #include "paddle/fluid/inference/analysis/helper.h"
+#include "paddle/utils/flags.h"
 
 namespace paddle {
 namespace inference {
@@ -30,7 +30,7 @@ extern void ReadBinaryFile(const std::string& filename, std::string* contents);
 
 namespace analysis {
 
-DEFINE_string(inference_model_dir, "", "inference test model dir");
+PD_DEFINE_string(inference_model_dir, "", "inference test model dir");
 
 }  // namespace analysis
 }  // namespace inference
diff --git a/paddle/fluid/inference/api/analysis_predictor.cc b/paddle/fluid/inference/api/analysis_predictor.cc
index 7d60d203f2b..1fb7e2c1571 100644
--- a/paddle/fluid/inference/api/analysis_predictor.cc
+++ b/paddle/fluid/inference/api/analysis_predictor.cc
@@ -1708,10 +1708,10 @@ CreatePaddlePredictor<AnalysisConfig, PaddleEngineKind::kAnalysis>(
 
   auto SetGflags = [](const AnalysisConfig &config) {
     auto SetGflag = [](const char *name, const char *value) {
-      std::string ret = ::GFLAGS_NAMESPACE::SetCommandLineOption(name, value);
+      bool success = paddle::flags::SetFlagValue(name, value);
       PADDLE_ENFORCE_EQ(
-          ret.empty(),
-          false,
+          success,
+          true,
           platform::errors::InvalidArgument(
               "Fail to set gflag: %s, please make sure the gflag exists.",
               name));
@@ -3089,8 +3089,8 @@ std::tuple<int, int, int> GetTrtRuntimeVersion() {
 #endif
 }
 
-std::string UpdateDllFlag(const char *name, const char *value) {
-  return paddle::UpdateDllFlag(name, value);
+void UpdateDllFlag(const char *name, const char *value) {
+  paddle::UpdateDllFlag(name, value);
 }
 
 void ConvertToMixedPrecision(const std::string &model_file,
diff --git a/paddle/fluid/inference/api/api.cc b/paddle/fluid/inference/api/api.cc
index 93b88632984..a15f8be18bf 100644
--- a/paddle/fluid/inference/api/api.cc
+++ b/paddle/fluid/inference/api/api.cc
@@ -14,13 +14,13 @@
 
 #include <sstream>
 
-#include "gflags/gflags.h"
 #include "paddle/fluid/framework/commit.h"
 #include "paddle/fluid/framework/lod_tensor.h"
 #include "paddle/fluid/framework/scope.h"
 #include "paddle/fluid/inference/api/paddle_inference_api.h"
 #include "paddle/fluid/inference/api/paddle_pass_builder.h"
 #include "paddle/fluid/platform/enforce.h"
+#include "paddle/utils/flags.h"
 
 namespace paddle {
 
@@ -134,20 +134,18 @@ std::string get_version() {
   return ss.str();
 }
 
-std::string UpdateDllFlag(const char *name, const char *value) {
+void UpdateDllFlag(const char *name, const char *value) {
   std::string ret;
   LOG(WARNING)
       << "The function \"UpdateDllFlag\" is only used to update the flag "
          "on the Windows shared library";
-  ret = ::GFLAGS_NAMESPACE::SetCommandLineOption(name, value);
+  bool success = paddle::flags::SetFlagValue(name, value);
 
   PADDLE_ENFORCE_EQ(
-      ret.empty(),
-      false,
+      success,
+      true,
       platform::errors::InvalidArgument(
           "Fail to update flag: %s, please make sure the flag exists.", name));
-  LOG(INFO) << ret;
-  return ret;
 }
 
 #ifdef PADDLE_WITH_CRYPTO
diff --git a/paddle/fluid/inference/api/api_impl.cc b/paddle/fluid/inference/api/api_impl.cc
index a2c2d099d77..d318042719a 100644
--- a/paddle/fluid/inference/api/api_impl.cc
+++ b/paddle/fluid/inference/api/api_impl.cc
@@ -26,7 +26,7 @@ limitations under the License. */
 #include "paddle/fluid/platform/place.h"
 #include "paddle/fluid/platform/profiler.h"
 
-DEFINE_bool(profile, false, "Turn on profiler for fluid");  // NOLINT
+PD_DEFINE_bool(profile, false, "Turn on profiler for fluid");  // NOLINT
 
 namespace paddle {
 namespace {
@@ -373,7 +373,6 @@ CreatePaddlePredictor<NativeConfig, PaddleEngineKind::kNative>(
     std::vector<std::string> flags;
     if (config.fraction_of_gpu_memory >= 0.0f ||
         config.fraction_of_gpu_memory <= 0.95f) {
-      flags.emplace_back("dummpy");
       std::string flag = "--fraction_of_gpu_memory_to_use=" +
                          num2str<float>(config.fraction_of_gpu_memory);
       flags.push_back(flag);
diff --git a/paddle/fluid/inference/api/demo_ci/onnxruntime_mobilenet_demo.cc b/paddle/fluid/inference/api/demo_ci/onnxruntime_mobilenet_demo.cc
index fb5cee4e050..04310139e5d 100644
--- a/paddle/fluid/inference/api/demo_ci/onnxruntime_mobilenet_demo.cc
+++ b/paddle/fluid/inference/api/demo_ci/onnxruntime_mobilenet_demo.cc
@@ -87,7 +87,7 @@ void Main() {
 }  // namespace paddle
 
 int main(int argc, char** argv) {
-  ::GFLAGS_NAMESPACE::ParseCommandLineFlags(&argc, &argv, true);
+  gflags::ParseCommandLineFlags(&argc, &argv, true);
   paddle::demo::Main();
   return 0;
 }
diff --git a/paddle/fluid/inference/api/demo_ci/simple_on_word2vec.cc b/paddle/fluid/inference/api/demo_ci/simple_on_word2vec.cc
index 66f2bc7056a..dca147e8353 100644
--- a/paddle/fluid/inference/api/demo_ci/simple_on_word2vec.cc
+++ b/paddle/fluid/inference/api/demo_ci/simple_on_word2vec.cc
@@ -133,7 +133,7 @@ void MainThreads(int num_threads, bool use_gpu) {
 }  // namespace paddle
 
 int main(int argc, char** argv) {
-  ::GFLAGS_NAMESPACE::ParseCommandLineFlags(&argc, &argv, true);
+  gflags::ParseCommandLineFlags(&argc, &argv, true);
   paddle::demo::Main(false /* use_gpu*/);
   paddle::demo::MainThreads(1, false /* use_gpu*/);
   paddle::demo::MainThreads(4, false /* use_gpu*/);
diff --git a/paddle/fluid/inference/api/demo_ci/trt_mobilenet_demo.cc b/paddle/fluid/inference/api/demo_ci/trt_mobilenet_demo.cc
index b6b20a901b2..b0f05d4f268 100644
--- a/paddle/fluid/inference/api/demo_ci/trt_mobilenet_demo.cc
+++ b/paddle/fluid/inference/api/demo_ci/trt_mobilenet_demo.cc
@@ -73,7 +73,7 @@ void Main() {
 }  // namespace paddle
 
 int main(int argc, char** argv) {
-  ::GFLAGS_NAMESPACE::ParseCommandLineFlags(&argc, &argv, true);
+  gflags::ParseCommandLineFlags(&argc, &argv, true);
   paddle::demo::Main();
   return 0;
 }
diff --git a/paddle/fluid/inference/api/demo_ci/vis_demo.cc b/paddle/fluid/inference/api/demo_ci/vis_demo.cc
index 7850b4edb10..022c1249af0 100644
--- a/paddle/fluid/inference/api/demo_ci/vis_demo.cc
+++ b/paddle/fluid/inference/api/demo_ci/vis_demo.cc
@@ -28,9 +28,6 @@ DEFINE_string(data,
               "path of data; each line is a record, format is "
               "'<space split floats as data>\t<space split ints as shape'");
 DEFINE_bool(use_gpu, false, "Whether use gpu.");
-#ifdef PADDLE_WITH_SHARED_LIB
-DECLARE_bool(profile);
-#endif
 
 namespace paddle {
 namespace demo {
@@ -81,7 +78,7 @@ void Main(bool use_gpu) {
 }  // namespace paddle
 
 int main(int argc, char** argv) {
-  ::GFLAGS_NAMESPACE::ParseCommandLineFlags(&argc, &argv, true);
+  gflags::ParseCommandLineFlags(&argc, &argv, true);
   if (FLAGS_use_gpu) {
     paddle::demo::Main(true /*use_gpu*/);
   } else {
diff --git a/paddle/fluid/inference/api/demo_ci/windows_mobilenet.cc b/paddle/fluid/inference/api/demo_ci/windows_mobilenet.cc
index 3db1861937d..79c93021599 100644
--- a/paddle/fluid/inference/api/demo_ci/windows_mobilenet.cc
+++ b/paddle/fluid/inference/api/demo_ci/windows_mobilenet.cc
@@ -85,7 +85,7 @@ void RunAnalysis() {
 }  // namespace paddle
 
 int main(int argc, char** argv) {
-  ::GFLAGS_NAMESPACE::ParseCommandLineFlags(&argc, &argv, true);
+  gflags::ParseCommandLineFlags(&argc, &argv, true);
   paddle::demo::RunAnalysis();
   std::cout << "=========================Runs successfully===================="
             << std::endl;
diff --git a/paddle/fluid/inference/api/onnxruntime_predictor_tester.cc b/paddle/fluid/inference/api/onnxruntime_predictor_tester.cc
index 7b8e4b2ec9f..2ae97f1738a 100644
--- a/paddle/fluid/inference/api/onnxruntime_predictor_tester.cc
+++ b/paddle/fluid/inference/api/onnxruntime_predictor_tester.cc
@@ -29,7 +29,7 @@
 #include "paddle/phi/backends/cpu/cpu_info.h"
 #include "test/cpp/inference/api/tester_helper.h"
 
-DEFINE_string(dirname, "", "dirname to tests.");
+PD_DEFINE_string(dirname, "", "dirname to tests.");
 
 namespace paddle {
 
diff --git a/paddle/fluid/inference/api/paddle_api.h b/paddle/fluid/inference/api/paddle_api.h
index d03c88735de..ba439a56469 100644
--- a/paddle/fluid/inference/api/paddle_api.h
+++ b/paddle/fluid/inference/api/paddle_api.h
@@ -459,7 +459,7 @@ PD_INFER_DECL int PaddleDtypeSize(PaddleDType dtype);
 
 PD_INFER_DECL std::string get_version();
 
-PD_INFER_DECL std::string UpdateDllFlag(const char* name, const char* value);
+PD_INFER_DECL void UpdateDllFlag(const char* name, const char* value);
 
 PD_INFER_DECL std::shared_ptr<framework::Cipher> MakeCipher(
     const std::string& config_file);
diff --git a/paddle/fluid/inference/api/paddle_inference_api.h b/paddle/fluid/inference/api/paddle_inference_api.h
index 18b8b6dfd43..0366a33a5f5 100644
--- a/paddle/fluid/inference/api/paddle_inference_api.h
+++ b/paddle/fluid/inference/api/paddle_inference_api.h
@@ -235,7 +235,7 @@ PD_INFER_DECL int GetNumBytesOfDataType(DataType dtype);
 PD_INFER_DECL std::string GetVersion();
 PD_INFER_DECL std::tuple<int, int, int> GetTrtCompileVersion();
 PD_INFER_DECL std::tuple<int, int, int> GetTrtRuntimeVersion();
-PD_INFER_DECL std::string UpdateDllFlag(const char* name, const char* value);
+PD_INFER_DECL void UpdateDllFlag(const char* name, const char* value);
 
 PD_INFER_DECL void ConvertToMixedPrecision(
     const std::string& model_file,
diff --git a/paddle/fluid/inference/io.cc b/paddle/fluid/inference/io.cc
index fe502a15798..000d4f5430e 100644
--- a/paddle/fluid/inference/io.cc
+++ b/paddle/fluid/inference/io.cc
@@ -29,12 +29,12 @@ limitations under the License. */
 // phi
 #include "paddle/phi/kernels/declarations.h"
 
-DEFINE_string(devices,  // NOLINT
-              "",
-              "The devices to be used which is joined by comma.");
-DEFINE_int32(math_num_threads,
-             1,
-             "Number of threads used to run math functions.");
+PD_DEFINE_string(devices,  // NOLINT
+                 "",
+                 "The devices to be used which is joined by comma.");
+PD_DEFINE_int32(math_num_threads,
+                1,
+                "Number of threads used to run math functions.");
 
 namespace paddle {
 namespace inference {
diff --git a/paddle/fluid/inference/paddle_inference.map b/paddle/fluid/inference/paddle_inference.map
index 93d90238e34..191f5934166 100644
--- a/paddle/fluid/inference/paddle_inference.map
+++ b/paddle/fluid/inference/paddle_inference.map
@@ -45,6 +45,7 @@
 			*paddle::RegisterSymbolsFor*;
 			*paddle::from_blob*;
 			*paddle::InitPhi*;
+			*paddle::flags*;
 
 			/* ut needs the following symbol, we need to modify all the ut to hidden such symbols */
 
diff --git a/paddle/fluid/inference/tensorrt/plugin/trt_plugin.h b/paddle/fluid/inference/tensorrt/plugin/trt_plugin.h
index f08a8a75ba4..b939dfaadc9 100644
--- a/paddle/fluid/inference/tensorrt/plugin/trt_plugin.h
+++ b/paddle/fluid/inference/tensorrt/plugin/trt_plugin.h
@@ -31,7 +31,7 @@ namespace nvinfer1 {
 class ITensor;
 }  // namespace nvinfer1
 
-DECLARE_bool(profile);
+PD_DECLARE_bool(profile);
 
 namespace paddle {
 namespace inference {
diff --git a/paddle/fluid/memory/allocation/allocator_facade_abs_flags_test.cc b/paddle/fluid/memory/allocation/allocator_facade_abs_flags_test.cc
index 1e09c43c4f1..21f734c7272 100644
--- a/paddle/fluid/memory/allocation/allocator_facade_abs_flags_test.cc
+++ b/paddle/fluid/memory/allocation/allocator_facade_abs_flags_test.cc
@@ -22,7 +22,7 @@ PHI_DECLARE_double(fraction_of_gpu_memory_to_use);
 PHI_DECLARE_double(fraction_of_cuda_pinned_memory_to_use);
 PHI_DECLARE_uint64(initial_gpu_memory_in_mb);
 PHI_DECLARE_uint64(reallocate_gpu_memory_in_mb);
-DECLARE_int64(gpu_allocator_retry_time);
+PD_DECLARE_int64(gpu_allocator_retry_time);
 #endif
 PHI_DECLARE_string(allocator_strategy);
 
diff --git a/paddle/fluid/memory/allocation/allocator_facade_frac_flags_test.cc b/paddle/fluid/memory/allocation/allocator_facade_frac_flags_test.cc
index 63e3eab3256..049807f6388 100644
--- a/paddle/fluid/memory/allocation/allocator_facade_frac_flags_test.cc
+++ b/paddle/fluid/memory/allocation/allocator_facade_frac_flags_test.cc
@@ -22,7 +22,7 @@ PHI_DECLARE_double(fraction_of_gpu_memory_to_use);
 PHI_DECLARE_double(fraction_of_cuda_pinned_memory_to_use);
 PHI_DECLARE_uint64(initial_gpu_memory_in_mb);
 PHI_DECLARE_uint64(reallocate_gpu_memory_in_mb);
-DECLARE_int64(gpu_allocator_retry_time);
+PD_DECLARE_int64(gpu_allocator_retry_time);
 #endif
 PHI_DECLARE_string(allocator_strategy);
 
diff --git a/paddle/fluid/memory/allocation/auto_growth_best_fit_allocator_facade_test.cc b/paddle/fluid/memory/allocation/auto_growth_best_fit_allocator_facade_test.cc
index bfd05b6b323..15dc5055179 100644
--- a/paddle/fluid/memory/allocation/auto_growth_best_fit_allocator_facade_test.cc
+++ b/paddle/fluid/memory/allocation/auto_growth_best_fit_allocator_facade_test.cc
@@ -26,7 +26,7 @@
 #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
 PHI_DECLARE_double(fraction_of_gpu_memory_to_use);
 PHI_DECLARE_double(fraction_of_cuda_pinned_memory_to_use);
-DECLARE_int64(gpu_allocator_retry_time);
+PD_DECLARE_int64(gpu_allocator_retry_time);
 #endif
 
 PHI_DECLARE_string(allocator_strategy);
diff --git a/paddle/fluid/memory/allocation/auto_growth_best_fit_allocator_test.cc b/paddle/fluid/memory/allocation/auto_growth_best_fit_allocator_test.cc
index 890ebde2aa3..350cdbd9b4c 100644
--- a/paddle/fluid/memory/allocation/auto_growth_best_fit_allocator_test.cc
+++ b/paddle/fluid/memory/allocation/auto_growth_best_fit_allocator_test.cc
@@ -19,8 +19,8 @@
 #include "gtest/gtest.h"
 #include "paddle/fluid/memory/allocation/aligned_allocator.h"
 
-DECLARE_bool(free_idle_chunk);
-DECLARE_bool(free_when_no_cache_hit);
+PD_DECLARE_bool(free_idle_chunk);
+PD_DECLARE_bool(free_when_no_cache_hit);
 
 namespace paddle {
 namespace memory {
diff --git a/paddle/fluid/memory/allocation/naive_best_fit_allocator.cc b/paddle/fluid/memory/allocation/naive_best_fit_allocator.cc
index 09053ec5ded..1493913f5b2 100644
--- a/paddle/fluid/memory/allocation/naive_best_fit_allocator.cc
+++ b/paddle/fluid/memory/allocation/naive_best_fit_allocator.cc
@@ -41,7 +41,7 @@ PADDLE_DEFINE_EXPORTED_bool(
 PHI_DECLARE_double(fraction_of_gpu_memory_to_use);
 PHI_DECLARE_uint64(initial_gpu_memory_in_mb);
 PHI_DECLARE_uint64(reallocate_gpu_memory_in_mb);
-DECLARE_bool(benchmark);
+PD_DECLARE_bool(benchmark);
 
 namespace paddle {
 namespace memory {
diff --git a/paddle/fluid/operators/pscore/CMakeLists.txt b/paddle/fluid/operators/pscore/CMakeLists.txt
index 5a397699951..dea89806bc2 100755
--- a/paddle/fluid/operators/pscore/CMakeLists.txt
+++ b/paddle/fluid/operators/pscore/CMakeLists.txt
@@ -37,15 +37,9 @@ else()
     ps_framework_proto
     framework_proto
     sendrecv_rpc
-    brpc
-    leveldb
-    ssl
-    crypto
-    protobuf
+    ${EXTERNAL_BRPC_DEPS}
     phi
-    glog
     zlib
-    snappy
     device_context)
 endif()
 
diff --git a/paddle/fluid/operators/tdm_child_op.h b/paddle/fluid/operators/tdm_child_op.h
index 0ebac190cda..1a126b0a885 100644
--- a/paddle/fluid/operators/tdm_child_op.h
+++ b/paddle/fluid/operators/tdm_child_op.h
@@ -21,9 +21,9 @@
 #include <utility>
 #include <vector>
 
-#include "gflags/gflags.h"
 #include "paddle/fluid/framework/op_registry.h"
 #include "paddle/phi/core/mixed_vector.h"
+#include "paddle/utils/flags.h"
 
 namespace paddle {
 namespace operators {
diff --git a/paddle/fluid/operators/tdm_sampler_op.h b/paddle/fluid/operators/tdm_sampler_op.h
index 4baff820784..f3b55c4a5cc 100644
--- a/paddle/fluid/operators/tdm_sampler_op.h
+++ b/paddle/fluid/operators/tdm_sampler_op.h
@@ -21,10 +21,10 @@
 #include <utility>
 #include <vector>
 
-#include "gflags/gflags.h"
 #include "paddle/fluid/framework/op_registry.h"
 #include "paddle/fluid/operators/math/sampler.h"
 #include "paddle/phi/core/mixed_vector.h"
+#include "paddle/utils/flags.h"
 
 namespace paddle {
 namespace operators {
diff --git a/paddle/fluid/platform/cpu_info_test.cc b/paddle/fluid/platform/cpu_info_test.cc
index 6f7ccd03cff..fef343112dc 100644
--- a/paddle/fluid/platform/cpu_info_test.cc
+++ b/paddle/fluid/platform/cpu_info_test.cc
@@ -15,10 +15,10 @@
 
 #include <sstream>
 
-#include "gflags/gflags.h"
 #include "gtest/gtest.h"
 #include "paddle/fluid/string/printf.h"
 #include "paddle/phi/core/flags.h"
+#include "paddle/utils/flags.h"
 
 PHI_DECLARE_double(fraction_of_cpu_memory_to_use);
 
diff --git a/paddle/fluid/platform/cuda_graph_with_memory_pool.cc b/paddle/fluid/platform/cuda_graph_with_memory_pool.cc
index 5799863f0aa..d15f67e9965 100644
--- a/paddle/fluid/platform/cuda_graph_with_memory_pool.cc
+++ b/paddle/fluid/platform/cuda_graph_with_memory_pool.cc
@@ -19,7 +19,7 @@
 #include "paddle/phi/backends/context_pool.h"
 #include "paddle/phi/core/flags.h"
 
-DECLARE_bool(use_stream_safe_cuda_allocator);
+PD_DECLARE_bool(use_stream_safe_cuda_allocator);
 PHI_DECLARE_bool(new_executor_use_cuda_graph);
 
 namespace paddle {
diff --git a/paddle/fluid/platform/device/gpu/gpu_info.cc b/paddle/fluid/platform/device/gpu/gpu_info.cc
index 10ba4ce6e1d..ba2494a2f48 100644
--- a/paddle/fluid/platform/device/gpu/gpu_info.cc
+++ b/paddle/fluid/platform/device/gpu/gpu_info.cc
@@ -20,7 +20,6 @@ limitations under the License. */
 #include <set>
 #include <vector>
 
-#include "gflags/gflags.h"
 #include "paddle/fluid/memory/memory.h"
 #include "paddle/fluid/platform/cuda_device_guard.h"
 #include "paddle/fluid/platform/enforce.h"
@@ -32,6 +31,7 @@ limitations under the License. */
 #include "paddle/fluid/platform/profiler/mem_tracing.h"
 #include "paddle/fluid/string/split.h"
 #include "paddle/phi/backends/gpu/gpu_info.h"
+#include "paddle/utils/flags.h"
 
 #ifdef PADDLE_WITH_HIP
 #include "paddle/fluid/platform/dynload/miopen.h"
diff --git a/paddle/fluid/platform/device/xpu/xpu_info.cc b/paddle/fluid/platform/device/xpu/xpu_info.cc
index 3193d5d8819..374166faeb6 100644
--- a/paddle/fluid/platform/device/xpu/xpu_info.cc
+++ b/paddle/fluid/platform/device/xpu/xpu_info.cc
@@ -14,7 +14,6 @@ limitations under the License. */
 #include <cstdlib>
 #include <string>
 
-#include "gflags/gflags.h"
 #include "paddle/fluid/platform/device/device_wrapper.h"
 #include "paddle/fluid/platform/device/xpu/enforce_xpu.h"
 #include "paddle/fluid/platform/device/xpu/xpu_header.h"
@@ -23,6 +22,7 @@ limitations under the License. */
 #include "paddle/fluid/platform/monitor.h"
 #include "paddle/fluid/platform/place.h"
 #include "paddle/phi/backends/xpu/xpu_info.h"
+#include "paddle/utils/flags.h"
 
 namespace paddle {
 namespace platform {
diff --git a/paddle/fluid/platform/dynload/dynamic_loader.cc b/paddle/fluid/platform/dynload/dynamic_loader.cc
index 40f69a87f37..94a96b67cd6 100644
--- a/paddle/fluid/platform/dynload/dynamic_loader.cc
+++ b/paddle/fluid/platform/dynload/dynamic_loader.cc
@@ -16,8 +16,8 @@ limitations under the License. */
 #include <string>
 #include <vector>
 
-#include "gflags/gflags.h"
 #include "paddle/phi/backends/dynload/dynamic_loader.h"
+#include "paddle/utils/flags.h"
 
 namespace paddle {
 namespace platform {
diff --git a/paddle/fluid/platform/enforce.h b/paddle/fluid/platform/enforce.h
index 425d4939b56..d9c9398461d 100644
--- a/paddle/fluid/platform/enforce.h
+++ b/paddle/fluid/platform/enforce.h
@@ -60,10 +60,10 @@ limitations under the License. */
 #endif
 
 #define GLOG_NO_ABBREVIATED_SEVERITIES  // msvc conflict logging with windows.h
-#include "gflags/gflags.h"
 #include "glog/logging.h"
 #include "paddle/fluid/platform/errors.h"
 #include "paddle/fluid/platform/macros.h"
+#include "paddle/utils/flags.h"
 
 #include "paddle/fluid/string/printf.h"
 #include "paddle/fluid/string/to_string.h"
diff --git a/paddle/fluid/platform/flags.h b/paddle/fluid/platform/flags.h
index b08aececd2b..66d8f9557ef 100644
--- a/paddle/fluid/platform/flags.h
+++ b/paddle/fluid/platform/flags.h
@@ -23,7 +23,7 @@
 
 #define __PADDLE_DEFINE_EXPORTED_FLAG(                                        \
     __name, __is_writable, __cpp_type, __gflag_type, __default_value, __doc)  \
-  DEFINE_##__gflag_type(__name, __default_value, __doc); /* NOLINT */         \
+  PD_DEFINE_##__gflag_type(__name, __default_value, __doc); /* NOLINT */      \
   struct __PaddleRegisterFlag_##__name {                                      \
     __PaddleRegisterFlag_##__name() {                                         \
       using FlagDeclaredType =                                                \
diff --git a/paddle/fluid/platform/init.cc b/paddle/fluid/platform/init.cc
index 2cc3dc7124a..4e0803c59ce 100644
--- a/paddle/fluid/platform/init.cc
+++ b/paddle/fluid/platform/init.cc
@@ -102,8 +102,8 @@ bool InitGflags(std::vector<std::string> args) {
             << ", Init commandline: " << line;
 
     char **arr = argv.data();
-    ::GFLAGS_NAMESPACE::AllowCommandLineReparsing();
-    ::GFLAGS_NAMESPACE::ParseCommandLineFlags(&argc, &arr, true);
+    paddle::flags::AllowUndefinedFlags();
+    paddle::flags::ParseCommandLineFlags(&argc, &arr);
     successed = true;
 
     VLOG(1) << "After Parse: argc is " << argc;
diff --git a/paddle/fluid/platform/init.h b/paddle/fluid/platform/init.h
index 2d5c34002b4..4ff99f2866e 100644
--- a/paddle/fluid/platform/init.h
+++ b/paddle/fluid/platform/init.h
@@ -17,8 +17,8 @@ limitations under the License. */
 #include <string>
 #include <vector>
 
-#include "gflags/gflags.h"
 #include "glog/logging.h"
+#include "paddle/utils/flags.h"
 
 namespace paddle {
 namespace framework {
diff --git a/paddle/fluid/platform/profiler.cc b/paddle/fluid/platform/profiler.cc
index 54d27cbd7b7..efee8a264bc 100644
--- a/paddle/fluid/platform/profiler.cc
+++ b/paddle/fluid/platform/profiler.cc
@@ -38,7 +38,9 @@ PADDLE_DEFINE_EXPORTED_bool(enable_rpc_profiler,
                             false,
                             "Enable rpc profiler or not.");
 
-DEFINE_bool(enable_record_memory, false, "enable memory recorder");  // NOLINT
+PD_DEFINE_bool(enable_record_memory,
+               false,
+               "enable memory recorder");  // NOLINT
 
 #if defined(_WIN32) && defined(PHI_SHARED)
 phi::ProfilerState phi::ProfilerHelper::g_state = phi::ProfilerState::kDisabled;
diff --git a/paddle/fluid/platform/profiler/host_tracer.cc b/paddle/fluid/platform/profiler/host_tracer.cc
index 6907ccd3b04..56fe468838b 100644
--- a/paddle/fluid/platform/profiler/host_tracer.cc
+++ b/paddle/fluid/platform/profiler/host_tracer.cc
@@ -17,17 +17,9 @@
 
 #include "glog/logging.h"
 #include "paddle/fluid/framework/op_proto_maker.h"
-#include "paddle/fluid/platform/flags.h"
 #include "paddle/fluid/platform/profiler/common_event.h"
 #include "paddle/fluid/platform/profiler/host_event_recorder.h"
 
-// Used to filter events, works like glog VLOG(level).
-// RecordEvent will works if host_trace_level >= level.
-PADDLE_DEFINE_EXPORTED_int64(host_trace_level,
-                             1,
-                             "RecordEvent will works "
-                             "if host_trace_level >= level.");
-
 namespace paddle {
 namespace platform {
 
diff --git a/paddle/fluid/platform/profiler/profiler.cc b/paddle/fluid/platform/profiler/profiler.cc
index 78e6443cbac..4f58b0e3cce 100644
--- a/paddle/fluid/platform/profiler/profiler.cc
+++ b/paddle/fluid/platform/profiler/profiler.cc
@@ -25,6 +25,7 @@
 #include "paddle/fluid/platform/device/gpu/gpu_info.h"
 #endif
 #include "paddle/fluid/platform/enforce.h"
+#include "paddle/fluid/platform/flags.h"
 #include "paddle/fluid/platform/profiler/cuda_tracer.h"
 #include "paddle/fluid/platform/profiler/custom_device/custom_tracer.h"
 #include "paddle/fluid/platform/profiler/extra_info.h"
@@ -36,6 +37,13 @@
 #include "paddle/phi/backends/device_manager.h"
 #endif
 
+// Used to filter events, works like glog VLOG(level).
+// RecordEvent will works if host_trace_level >= level.
+PADDLE_DEFINE_EXPORTED_int64(host_trace_level,
+                             1,
+                             "RecordEvent will works "
+                             "if host_trace_level >= level.");
+
 namespace paddle {
 namespace platform {
 
diff --git a/paddle/fluid/platform/profiler/profiler.h b/paddle/fluid/platform/profiler/profiler.h
index 28cf7a2d385..4ab98bab530 100644
--- a/paddle/fluid/platform/profiler/profiler.h
+++ b/paddle/fluid/platform/profiler/profiler.h
@@ -27,7 +27,7 @@
 #include "paddle/fluid/platform/profiler/event_python.h"
 #include "paddle/fluid/platform/profiler/tracer_base.h"
 
-DECLARE_int64(host_trace_level);
+PD_DECLARE_int64(host_trace_level);
 
 namespace paddle {
 namespace platform {
diff --git a/paddle/fluid/pybind/CMakeLists.txt b/paddle/fluid/pybind/CMakeLists.txt
index 42cdd9be306..478efa7a5af 100755
--- a/paddle/fluid/pybind/CMakeLists.txt
+++ b/paddle/fluid/pybind/CMakeLists.txt
@@ -57,18 +57,7 @@ if(WITH_PSCORE)
   endif()
 endif()
 if(WITH_RPC)
-  set(PYBIND_DEPS
-      ${PYBIND_DEPS}
-      paddle_rpc
-      brpc
-      ssl
-      crypto
-      protobuf
-      zlib
-      leveldb
-      snappy
-      phi
-      glog)
+  set(PYBIND_DEPS ${PYBIND_DEPS} paddle_rpc ${EXTERNAL_BRPC_DEPS} zlib phi)
 endif()
 if(WITH_GPU OR WITH_ROCM)
   set(PYBIND_DEPS ${PYBIND_DEPS} dynload_cuda)
diff --git a/paddle/fluid/pybind/eager_method.cc b/paddle/fluid/pybind/eager_method.cc
index f667602a493..7d3037a076d 100644
--- a/paddle/fluid/pybind/eager_method.cc
+++ b/paddle/fluid/pybind/eager_method.cc
@@ -67,7 +67,7 @@ typedef SSIZE_T ssize_t;
 #include "paddle/utils/pybind.h"
 
 PHI_DECLARE_bool(set_to_1d);
-DECLARE_bool(use_stride_kernel);
+PD_DECLARE_bool(use_stride_kernel);
 
 namespace paddle {
 namespace pybind {
diff --git a/paddle/fluid/pybind/global_value_getter_setter.cc b/paddle/fluid/pybind/global_value_getter_setter.cc
index 94e3ca1ba41..7f8cac9ee1e 100644
--- a/paddle/fluid/pybind/global_value_getter_setter.cc
+++ b/paddle/fluid/pybind/global_value_getter_setter.cc
@@ -22,12 +22,12 @@
 #include <utility>
 #include <vector>
 
-#include "gflags/gflags.h"
 #include "paddle/fluid/framework/python_headers.h"
 #include "paddle/fluid/platform/enforce.h"
 #include "paddle/fluid/platform/errors.h"
 #include "paddle/fluid/platform/macros.h"
 #include "paddle/phi/core/macros.h"
+#include "paddle/utils/flags.h"
 #include "pybind11/stl.h"
 
 // FIXME(zengjinle): these 2 flags may be removed by the linker when compiling
@@ -41,8 +41,8 @@ PADDLE_FORCE_LINK_FLAG(free_when_no_cache_hit);
 
 // NOTE: where are these 2 flags from?
 #ifdef PADDLE_WITH_DISTRIBUTE
-DECLARE_int32(rpc_get_thread_num);
-DECLARE_int32(rpc_prefetch_thread_num);
+PD_DECLARE_int32(rpc_get_thread_num);
+PD_DECLARE_int32(rpc_prefetch_thread_num);
 #endif
 
 namespace paddle {
diff --git a/paddle/fluid/pybind/reader_py.cc b/paddle/fluid/pybind/reader_py.cc
index 0ae52f7fdd7..0e581e45b59 100644
--- a/paddle/fluid/pybind/reader_py.cc
+++ b/paddle/fluid/pybind/reader_py.cc
@@ -23,7 +23,6 @@
 
 #include "Python.h"
 
-#include "gflags/gflags.h"
 #include "paddle/fluid/framework/reader.h"
 #include "paddle/fluid/imperative/layer.h"
 #include "paddle/fluid/imperative/tracer.h"
@@ -33,6 +32,7 @@
 #include "paddle/fluid/platform/place.h"
 #include "paddle/phi/core/ddim.h"
 #include "paddle/phi/core/flags.h"
+#include "paddle/utils/flags.h"
 #include "pybind11/stl.h"
 
 PHI_DECLARE_bool(reader_queue_speed_test_mode);
diff --git a/paddle/fluid/string/pretty_log.h b/paddle/fluid/string/pretty_log.h
index d161b2a912f..8e34cbb8ab9 100644
--- a/paddle/fluid/string/pretty_log.h
+++ b/paddle/fluid/string/pretty_log.h
@@ -18,5 +18,5 @@
 #include <string>
 #include <utility>
 
-#include "gflags/gflags.h"
+#include "paddle/utils/flags.h"
 #include "paddle/utils/string/pretty_log.h"
diff --git a/paddle/phi/CMakeLists.txt b/paddle/phi/CMakeLists.txt
index 83d8006957a..cfbf8fec0ad 100644
--- a/paddle/phi/CMakeLists.txt
+++ b/paddle/phi/CMakeLists.txt
@@ -36,7 +36,7 @@ endif()
 set(PHI_DEPS
     phi_profiler_proto
     auto_parallel_proto
-    gflags
+    ${flags_dep}
     glog
     warpctc
     warprnnt
diff --git a/paddle/phi/api/lib/api_gen_utils.cc b/paddle/phi/api/lib/api_gen_utils.cc
index 73a1ed4c7fd..80e169fae10 100644
--- a/paddle/phi/api/lib/api_gen_utils.cc
+++ b/paddle/phi/api/lib/api_gen_utils.cc
@@ -13,11 +13,11 @@ See the License for the specific language governing permissions and
 limitations under the License. */
 
 #include "paddle/phi/api/lib/api_gen_utils.h"
-#include "gflags/gflags.h"
 #include "paddle/phi/core/visit_type.h"
 #include "paddle/phi/kernels/strided_copy_kernel.h"
+#include "paddle/utils/flags.h"
 
-DECLARE_bool(use_stride_kernel);
+PD_DECLARE_bool(use_stride_kernel);
 
 #include "glog/logging.h"
 
diff --git a/paddle/phi/api/lib/data_transform.cc b/paddle/phi/api/lib/data_transform.cc
index 3d717c6dbfe..a8eb379e359 100644
--- a/paddle/phi/api/lib/data_transform.cc
+++ b/paddle/phi/api/lib/data_transform.cc
@@ -16,7 +16,6 @@ limitations under the License. */
 
 #include "glog/logging.h"
 
-#include "gflags/gflags.h"
 #include "paddle/fluid/platform/device_context.h"
 #include "paddle/phi/api/lib/kernel_dispatch.h"
 #include "paddle/phi/api/lib/utils/allocator.h"
@@ -28,8 +27,9 @@ limitations under the License. */
 #include "paddle/phi/kernels/cast_kernel.h"
 #include "paddle/phi/kernels/contiguous_kernel.h"
 #include "paddle/phi/kernels/transfer_layout_kernel.h"
+#include "paddle/utils/flags.h"
 
-DECLARE_bool(use_stride_kernel);
+PD_DECLARE_bool(use_stride_kernel);
 
 namespace paddle {
 namespace experimental {
diff --git a/paddle/phi/api/profiler/device_tracer.cc b/paddle/phi/api/profiler/device_tracer.cc
index b16579491f4..e294130da7b 100644
--- a/paddle/phi/api/profiler/device_tracer.cc
+++ b/paddle/phi/api/profiler/device_tracer.cc
@@ -21,11 +21,11 @@ limitations under the License. */
 #include <string>
 #include <thread>  // NOLINT
 
-#include "gflags/gflags.h"
 #include "glog/logging.h"
 #include "paddle/phi/core/enforce.h"
+#include "paddle/utils/flags.h"
 
-DECLARE_bool(enable_host_event_recorder_hook);
+PD_DECLARE_bool(enable_host_event_recorder_hook);
 
 namespace phi {
 
diff --git a/paddle/phi/api/yaml/generator/api_gen.py b/paddle/phi/api/yaml/generator/api_gen.py
index 50ba2078b04..5164ebda840 100644
--- a/paddle/phi/api/yaml/generator/api_gen.py
+++ b/paddle/phi/api/yaml/generator/api_gen.py
@@ -360,7 +360,7 @@ def source_include(header_file_path):
 #include <memory>
 
 #include "glog/logging.h"
-#include "gflags/gflags.h"
+#include "paddle/utils/flags.h"
 
 #include "paddle/phi/api/lib/api_custom_impl.h"
 #include "paddle/phi/api/lib/api_gen_utils.h"
@@ -379,8 +379,8 @@ def source_include(header_file_path):
 #include "paddle/phi/api/profiler/event_tracing.h"
 #include "paddle/phi/api/profiler/supplement_tracing.h"
 
-DECLARE_bool(conv2d_disable_cudnn);
-DECLARE_int32(low_precision_op_list);
+PD_DECLARE_bool(conv2d_disable_cudnn);
+PD_DECLARE_int32(low_precision_op_list);
 """
 
 
diff --git a/paddle/phi/api/yaml/generator/backward_api_gen.py b/paddle/phi/api/yaml/generator/backward_api_gen.py
index 183904f642c..9347552dbb1 100644
--- a/paddle/phi/api/yaml/generator/backward_api_gen.py
+++ b/paddle/phi/api/yaml/generator/backward_api_gen.py
@@ -275,7 +275,7 @@ def source_include(header_file_path, fw_header_file_path):
 #include <memory>
 
 #include "glog/logging.h"
-#include "gflags/gflags.h"
+#include "paddle/utils/flags.h"
 
 #include "paddle/phi/api/lib/api_custom_impl.h"
 #include "paddle/phi/api/lib/api_gen_utils.h"
@@ -290,8 +290,8 @@ def source_include(header_file_path, fw_header_file_path):
 #include "paddle/phi/api/profiler/event_tracing.h"
 #include "paddle/phi/api/profiler/supplement_tracing.h"
 
-DECLARE_bool(conv2d_disable_cudnn);
-DECLARE_int32(low_precision_op_list);
+PD_DECLARE_bool(conv2d_disable_cudnn);
+PD_DECLARE_int32(low_precision_op_list);
 """
 
 
diff --git a/paddle/phi/api/yaml/generator/dist_bw_api_gen.py b/paddle/phi/api/yaml/generator/dist_bw_api_gen.py
index ac6f5f2e916..95a6f94706e 100644
--- a/paddle/phi/api/yaml/generator/dist_bw_api_gen.py
+++ b/paddle/phi/api/yaml/generator/dist_bw_api_gen.py
@@ -138,7 +138,7 @@ def source_include(header_file_path, fw_header_file_path):
 #include <memory>
 
 #include "glog/logging.h"
-#include "gflags/gflags.h"
+#include "paddle/utils/flags.h"
 
 #include "paddle/phi/api/lib/api_custom_impl.h"
 #include "paddle/phi/api/lib/api_gen_utils.h"
@@ -153,8 +153,8 @@ def source_include(header_file_path, fw_header_file_path):
 #include "paddle/phi/api/profiler/event_tracing.h"
 #include "paddle/phi/api/profiler/supplement_tracing.h"
 
-DECLARE_bool(conv2d_disable_cudnn);
-DECLARE_int32(low_precision_op_list);
+PD_DECLARE_bool(conv2d_disable_cudnn);
+PD_DECLARE_int32(low_precision_op_list);
 """
 
 
diff --git a/paddle/phi/api/yaml/generator/intermediate_api_gen.py b/paddle/phi/api/yaml/generator/intermediate_api_gen.py
index e8f9c1cf751..5c3b6ddd32d 100644
--- a/paddle/phi/api/yaml/generator/intermediate_api_gen.py
+++ b/paddle/phi/api/yaml/generator/intermediate_api_gen.py
@@ -36,7 +36,7 @@ def source_include(header_file_path):
 #include <memory>
 
 #include "glog/logging.h"
-#include "gflags/gflags.h"
+#include "paddle/utils/flags.h"
 
 #include "paddle/phi/api/lib/api_custom_impl.h"
 #include "paddle/phi/api/lib/api_gen_utils.h"
@@ -56,7 +56,7 @@ def source_include(header_file_path):
 #include "paddle/phi/api/profiler/event_tracing.h"
 #include "paddle/phi/api/profiler/supplement_tracing.h"
 
-DECLARE_int32(low_precision_op_list);
+PD_DECLARE_int32(low_precision_op_list);
 """
 
 
diff --git a/paddle/phi/api/yaml/generator/sparse_api_gen.py b/paddle/phi/api/yaml/generator/sparse_api_gen.py
index af28fcb0948..9a017725d68 100644
--- a/paddle/phi/api/yaml/generator/sparse_api_gen.py
+++ b/paddle/phi/api/yaml/generator/sparse_api_gen.py
@@ -426,7 +426,7 @@ def source_include(header_file_path):
 #include <memory>
 
 #include "glog/logging.h"
-#include "gflags/gflags.h"
+#include "paddle/utils/flags.h"
 
 #include "paddle/phi/api/lib/api_gen_utils.h"
 #include "paddle/phi/api/lib/data_transform.h"
@@ -442,7 +442,7 @@ def source_include(header_file_path):
 #include "paddle/phi/infermeta/sparse/binary.h"
 #include "paddle/phi/infermeta/sparse/multiary.h"
 
-DECLARE_int32(low_precision_op_list);
+PD_DECLARE_int32(low_precision_op_list);
 """
 
 
diff --git a/paddle/phi/api/yaml/generator/sparse_bw_api_gen.py b/paddle/phi/api/yaml/generator/sparse_bw_api_gen.py
index 67a8514bf19..064cf07d0db 100644
--- a/paddle/phi/api/yaml/generator/sparse_bw_api_gen.py
+++ b/paddle/phi/api/yaml/generator/sparse_bw_api_gen.py
@@ -121,7 +121,7 @@ def source_include(header_file_path):
 #include <memory>
 
 #include "glog/logging.h"
-#include "gflags/gflags.h"
+#include "paddle/utils/flags.h"
 
 #include "paddle/phi/api/include/sparse_api.h"
 #include "paddle/phi/api/lib/api_gen_utils.h"
@@ -137,7 +137,7 @@ def source_include(header_file_path):
 #include "paddle/phi/infermeta/sparse/binary.h"
 #include "paddle/phi/infermeta/sparse/backward.h"
 
-DECLARE_int32(low_precision_op_list);
+PD_DECLARE_int32(low_precision_op_list);
 """
 
 
diff --git a/paddle/phi/api/yaml/generator/strings_api_gen.py b/paddle/phi/api/yaml/generator/strings_api_gen.py
index ed0bfe42bd8..4e66bd5f2fd 100644
--- a/paddle/phi/api/yaml/generator/strings_api_gen.py
+++ b/paddle/phi/api/yaml/generator/strings_api_gen.py
@@ -329,8 +329,8 @@ def source_include(header_file_path):
     return f"""
 #include "{header_file_path}"
 
-#include "gflags/gflags.h"
 #include "glog/logging.h"
+#include "paddle/utils/flags.h"
 
 #include "paddle/phi/api/lib/api_gen_utils.h"
 #include "paddle/phi/core/kernel_context.h"
@@ -340,7 +340,7 @@ def source_include(header_file_path):
 #include "paddle/phi/api/lib/kernel_dispatch.h"
 #include "paddle/phi/core/kernel_registry.h"
 
-DECLARE_int32(low_precision_op_list);
+PD_DECLARE_int32(low_precision_op_list);
 """
 
 
diff --git a/paddle/phi/api/yaml/generator/tensor_operants_gen.py b/paddle/phi/api/yaml/generator/tensor_operants_gen.py
index f86efeaaefa..1ca80a8bd76 100644
--- a/paddle/phi/api/yaml/generator/tensor_operants_gen.py
+++ b/paddle/phi/api/yaml/generator/tensor_operants_gen.py
@@ -441,16 +441,16 @@ operants_manager_source_include = """// Generated by paddle/phi/api/yaml/generat
 
 #include "paddle/phi/api/include/operants_manager.h"
 
-#include "gflags/gflags.h"
 #include "glog/logging.h"
 #include "paddle/phi/core/enforce.h"
 #include "paddle/phi/core/errors.h"
+#include "paddle/utils/flags.h"
 
 """
 
 
 operants_manager_source_start = """
-DECLARE_string(tensor_operants_mode);
+PD_DECLARE_string(tensor_operants_mode);
 
 namespace paddle {
 
diff --git a/paddle/phi/backends/cpu/cpu_info.cc b/paddle/phi/backends/cpu/cpu_info.cc
index 8d1d757dfd5..2eda0104877 100644
--- a/paddle/phi/backends/cpu/cpu_info.cc
+++ b/paddle/phi/backends/cpu/cpu_info.cc
@@ -35,9 +35,9 @@ limitations under the License. */
 
 #include "paddle/phi/core/flags.h"
 
-DECLARE_double(fraction_of_cpu_memory_to_use);
-DECLARE_uint64(initial_cpu_memory_in_mb);
-DECLARE_double(fraction_of_cuda_pinned_memory_to_use);
+PD_DECLARE_double(fraction_of_cpu_memory_to_use);
+PD_DECLARE_uint64(initial_cpu_memory_in_mb);
+PD_DECLARE_double(fraction_of_cuda_pinned_memory_to_use);
 
 // If use_pinned_memory is true, CPUAllocator calls mlock, which
 // returns pinned and locked memory as staging areas for data exchange
diff --git a/paddle/phi/backends/device_base.cc b/paddle/phi/backends/device_base.cc
index 2b7d0411fed..5b6b8fcfc2f 100644
--- a/paddle/phi/backends/device_base.cc
+++ b/paddle/phi/backends/device_base.cc
@@ -14,13 +14,13 @@
 
 #include "paddle/phi/backends/device_base.h"
 
-#include "gflags/gflags.h"
 #include "glog/logging.h"
 #include "paddle/phi/core/enforce.h"
+#include "paddle/utils/flags.h"
 
-DECLARE_double(fraction_of_gpu_memory_to_use);
-DECLARE_uint64(initial_gpu_memory_in_mb);
-DECLARE_uint64(reallocate_gpu_memory_in_mb);
+PD_DECLARE_double(fraction_of_gpu_memory_to_use);
+PD_DECLARE_uint64(initial_gpu_memory_in_mb);
+PD_DECLARE_uint64(reallocate_gpu_memory_in_mb);
 
 constexpr static float fraction_reserve_gpu_memory = 0.05f;
 
diff --git a/paddle/phi/backends/dynload/cudnn_frontend.h b/paddle/phi/backends/dynload/cudnn_frontend.h
index 4d0b67ce228..ef680d7cba0 100644
--- a/paddle/phi/backends/dynload/cudnn_frontend.h
+++ b/paddle/phi/backends/dynload/cudnn_frontend.h
@@ -14,13 +14,13 @@ See the License for the specific language governing permissions and
 limitations under the License. */
 
 #pragma once
-#include "gflags/gflags.h"
 #include "glog/logging.h"
+#include "paddle/utils/flags.h"
 
 #include "paddle/phi/backends/dynload/cudnn.h"
 #include "paddle/phi/backends/gpu/gpu_info.h"
 
-DECLARE_bool(enable_cudnn_frontend);
+PD_DECLARE_bool(enable_cudnn_frontend);
 
 // Redirect the CUDNN APIs in the cudnn_frontend namespace to
 // the functions in phi::dynload
diff --git a/paddle/phi/backends/dynload/dynamic_loader.cc b/paddle/phi/backends/dynload/dynamic_loader.cc
index 4621a9c3dda..6989f32b18e 100644
--- a/paddle/phi/backends/dynload/dynamic_loader.cc
+++ b/paddle/phi/backends/dynload/dynamic_loader.cc
@@ -103,7 +103,7 @@ PHI_DEFINE_string(rccl_dir,
 #endif
 
 #ifdef PADDLE_WITH_XPU
-DEFINE_string(xpti_dir, "", "Specify path for loading libxpti.so.");
+PD_DEFINE_string(xpti_dir, "", "Specify path for loading libxpti.so.");
 #endif
 
 namespace phi {
diff --git a/paddle/phi/backends/gpu/cuda/cudnn_helper.h b/paddle/phi/backends/gpu/cuda/cudnn_helper.h
index 8b6c04090d8..651a4247a12 100644
--- a/paddle/phi/backends/gpu/cuda/cudnn_helper.h
+++ b/paddle/phi/backends/gpu/cuda/cudnn_helper.h
@@ -17,7 +17,6 @@ limitations under the License. */
 #include <string>
 #include <vector>
 
-#include "gflags/gflags.h"
 #include "paddle/phi/backends/dynload/cudnn.h"
 #include "paddle/phi/common/bfloat16.h"
 #include "paddle/phi/common/float16.h"
@@ -26,8 +25,9 @@ limitations under the License. */
 #include "paddle/phi/core/enforce.h"
 #include "paddle/phi/core/errors.h"
 #include "paddle/phi/core/macros.h"
+#include "paddle/utils/flags.h"
 
-DECLARE_bool(cudnn_deterministic);
+PD_DECLARE_bool(cudnn_deterministic);
 
 namespace phi {
 namespace backends {
diff --git a/paddle/phi/backends/gpu/gpu_info.cc b/paddle/phi/backends/gpu/gpu_info.cc
index 417ff4c72e8..f6ca9d4168b 100644
--- a/paddle/phi/backends/gpu/gpu_info.cc
+++ b/paddle/phi/backends/gpu/gpu_info.cc
@@ -17,12 +17,12 @@ limitations under the License. */
 #include <sstream>
 #include <vector>
 
-#include "gflags/gflags.h"
 #include "glog/logging.h"
+#include "paddle/utils/flags.h"
 
 #include "paddle/phi/common/memory_utils.h"
 
-DECLARE_string(selected_gpus);
+PD_DECLARE_string(selected_gpus);
 
 namespace phi {
 namespace backends {
diff --git a/paddle/phi/backends/gpu/rocm/miopen_helper.h b/paddle/phi/backends/gpu/rocm/miopen_helper.h
index 095f32ba460..b8ce6e22e93 100644
--- a/paddle/phi/backends/gpu/rocm/miopen_helper.h
+++ b/paddle/phi/backends/gpu/rocm/miopen_helper.h
@@ -17,7 +17,7 @@ limitations under the License. */
 #include <string>
 #include <vector>
 
-#include "gflags/gflags.h"
+#include "paddle/utils/flags.h"
 
 #include "paddle/phi/backends/dynload/miopen.h"
 #include "paddle/phi/common/bfloat16.h"
@@ -31,7 +31,7 @@ limitations under the License. */
 // MIOPEN do not have epslion definition
 #define CUDNN_BN_MIN_EPSILON 1e-05
 
-DECLARE_bool(cudnn_deterministic);
+PD_DECLARE_bool(cudnn_deterministic);
 
 namespace phi {
 namespace backends {
diff --git a/paddle/phi/core/enforce.cc b/paddle/phi/core/enforce.cc
index 69bcbf91ef4..f47eb91e97d 100644
--- a/paddle/phi/core/enforce.cc
+++ b/paddle/phi/core/enforce.cc
@@ -20,16 +20,16 @@ limitations under the License. */
 #include <unordered_map>
 #include <vector>
 
-#include "gflags/gflags.h"
 #include "glog/logging.h"
 #include "paddle/phi/common/scalar.h"
 #include "paddle/utils/blank.h"
+#include "paddle/utils/flags.h"
 
 #ifdef PADDLE_WITH_CUDA
 #include "paddle/phi/core/external_error.pb.h"
 #endif  // PADDLE_WITH_CUDA
 
-DECLARE_int32(call_stack_level);
+PD_DECLARE_int32(call_stack_level);
 
 namespace egr {
 class EagerVariable;
diff --git a/paddle/phi/core/flags.cc b/paddle/phi/core/flags.cc
index 2470981f661..41d2dc8003b 100644
--- a/paddle/phi/core/flags.cc
+++ b/paddle/phi/core/flags.cc
@@ -1056,19 +1056,21 @@ PHI_DEFINE_EXPORTED_uint64(executor_log_deps_every_microseconds,
                            0,
                            "Enable new executor log deps every n microseconds");
 
-DEFINE_int32(record_pool_max_size,
-             2000000,
-             "SlotRecordDataset slot record pool max size");
-DEFINE_int32(slotpool_thread_num, 1, "SlotRecordDataset slot pool thread num");
-DEFINE_bool(enable_slotpool_wait_release,  // NOLINT
-            false,
-            "enable slotrecord object wait release, default false");
-DEFINE_bool(enable_slotrecord_reset_shrink,  // NOLINT
-            false,
-            "enable slotrecord object reset shrink memory, default false");
-DEFINE_bool(enable_ins_parser_file,  // NOLINT
-            false,
-            "enable parser ins file, default false");
+PD_DEFINE_int32(record_pool_max_size,
+                2000000,
+                "SlotRecordDataset slot record pool max size");
+PD_DEFINE_int32(slotpool_thread_num,
+                1,
+                "SlotRecordDataset slot pool thread num");
+PD_DEFINE_bool(enable_slotpool_wait_release,  // NOLINT
+               false,
+               "enable slotrecord object wait release, default false");
+PD_DEFINE_bool(enable_slotrecord_reset_shrink,  // NOLINT
+               false,
+               "enable slotrecord object reset shrink memory, default false");
+PD_DEFINE_bool(enable_ins_parser_file,  // NOLINT
+               false,
+               "enable parser ins file, default false");
 PHI_DEFINE_EXPORTED_bool(
     gpugraph_enable_hbm_table_collision_stat,
     false,
diff --git a/paddle/phi/core/flags.h b/paddle/phi/core/flags.h
index 278090ff97d..776e268b201 100644
--- a/paddle/phi/core/flags.h
+++ b/paddle/phi/core/flags.h
@@ -20,9 +20,8 @@
 #include <string>
 #include <type_traits>
 
-#include "gflags/gflags.h"
 #include "paddle/phi/core/macros.h"
-
+#include "paddle/utils/flags.h"
 #include "paddle/utils/variant.h"
 
 #if defined(_WIN32)
@@ -33,6 +32,7 @@
 #define PHI_IMPORT_FLAG
 #endif  // _WIN32
 
+#ifdef PADDLE_WITH_GFLAGS
 // We redefine the gflags' macro for exporting global variable
 
 // ----------------------------DECLARE FLAGS----------------------------
@@ -127,6 +127,23 @@
   clstring& FLAGS_##name = *FLAGS_no##name;                        \
   } /* NOLINT */                                                   \
   using fLS::FLAGS_##name
+#else  // PADDLE_WITH_GFLAGS
+#define PHI_DEFINE_bool(name, val, txt) PD_DEFINE_bool(name, val, txt)
+#define PHI_DEFINE_int32(name, val, txt) PD_DEFINE_int32(name, val, txt)
+#define PHI_DEFINE_uint32(name, val, txt) PD_DEFINE_uint32(name, val, txt)
+#define PHI_DEFINE_int64(name, val, txt) PD_DEFINE_int64(name, val, txt)
+#define PHI_DEFINE_uint64(name, val, txt) PD_DEFINE_uint64(name, val, txt)
+#define PHI_DEFINE_double(name, val, txt) PD_DEFINE_double(name, val, txt)
+#define PHI_DEFINE_string(name, val, txt) PD_DEFINE_string(name, val, txt)
+
+#define PHI_DECLARE_bool(name) PD_DECLARE_bool(name)
+#define PHI_DECLARE_int32(name) PD_DECLARE_int32(name)
+#define PHI_DECLARE_uint32(name) PD_DECLARE_uint32(name)
+#define PHI_DECLARE_int64(name) PD_DECLARE_int64(name)
+#define PHI_DECLARE_uint64(name) PD_DECLARE_uint64(name)
+#define PHI_DECLARE_double(name) PD_DECLARE_double(name)
+#define PHI_DECLARE_string(name) PD_DECLARE_string(name)
+#endif
 
 namespace phi {
 
diff --git a/paddle/phi/core/kernel_factory.cc b/paddle/phi/core/kernel_factory.cc
index 543fe4fef76..3ead251e332 100644
--- a/paddle/phi/core/kernel_factory.cc
+++ b/paddle/phi/core/kernel_factory.cc
@@ -14,9 +14,9 @@
 
 #include "paddle/phi/core/kernel_factory.h"
 
-#include "gflags/gflags.h"
 #include "glog/logging.h"
 #include "paddle/phi/core/enforce.h"
+#include "paddle/utils/flags.h"
 #if defined(PADDLE_WITH_XPU)
 #include "paddle/phi/backends/xpu/xpu_op_list.h"
 #include "paddle/phi/common/data_type.h"
@@ -34,9 +34,9 @@ PADDLE_DEFINE_EXPORTED_bool(
     true,
     "Whether to use strdie kernel if op support stride.");
 
-DECLARE_int32(low_precision_op_list);
-DECLARE_bool(enable_api_kernel_fallback);
-DECLARE_bool(run_kp_kernel);
+PD_DECLARE_int32(low_precision_op_list);
+PD_DECLARE_bool(enable_api_kernel_fallback);
+PD_DECLARE_bool(run_kp_kernel);
 namespace phi {
 
 const static Kernel empty_kernel;  // NOLINT
diff --git a/paddle/phi/core/threadpool.cc b/paddle/phi/core/threadpool.cc
index 5e146023c29..7538087f4e8 100644
--- a/paddle/phi/core/threadpool.cc
+++ b/paddle/phi/core/threadpool.cc
@@ -16,14 +16,14 @@
 
 #include <thread>
 
-#include "gflags/gflags.h"
 #include "glog/logging.h"
 #include "paddle/phi/core/enforce.h"
+#include "paddle/utils/flags.h"
 
-DECLARE_int32(dist_threadpool_size);
-DEFINE_int32(io_threadpool_size,
-             100,
-             "number of threads used for doing IO, default 100");
+PD_DECLARE_int32(dist_threadpool_size);
+PD_DEFINE_int32(io_threadpool_size,
+                100,
+                "number of threads used for doing IO, default 100");
 
 namespace phi {
 
diff --git a/paddle/phi/infermeta/unary.cc b/paddle/phi/infermeta/unary.cc
index 6e04a149266..4c952bb3cd2 100644
--- a/paddle/phi/infermeta/unary.cc
+++ b/paddle/phi/infermeta/unary.cc
@@ -17,7 +17,6 @@ limitations under the License. */
 #include <algorithm>
 #include <set>
 
-#include "gflags/gflags.h"
 #include "paddle/phi/common/data_type.h"
 #include "paddle/phi/common/type_traits.h"
 #include "paddle/phi/core/enforce.h"
@@ -31,6 +30,7 @@ limitations under the License. */
 #include "paddle/phi/kernels/funcs/unfold_functor.h"
 #include "paddle/phi/kernels/funcs/unsqueeze.h"
 #include "paddle/phi/kernels/impl/einsum_impl.h"
+#include "paddle/utils/flags.h"
 
 namespace phi {
 
diff --git a/paddle/phi/kernels/autotune/cache_cudnn_frontend.h b/paddle/phi/kernels/autotune/cache_cudnn_frontend.h
index 095cedccb99..4715efa1f77 100644
--- a/paddle/phi/kernels/autotune/cache_cudnn_frontend.h
+++ b/paddle/phi/kernels/autotune/cache_cudnn_frontend.h
@@ -22,7 +22,7 @@
 
 #include "paddle/phi/backends/dynload/cudnn_frontend.h"
 
-DECLARE_int32(cudnn_cache_saturation_count);
+PD_DECLARE_int32(cudnn_cache_saturation_count);
 
 namespace phi {
 namespace autotune {
diff --git a/paddle/phi/kernels/autotune/switch_autotune.cc b/paddle/phi/kernels/autotune/switch_autotune.cc
index 3742749b3bf..e287705d08b 100644
--- a/paddle/phi/kernels/autotune/switch_autotune.cc
+++ b/paddle/phi/kernels/autotune/switch_autotune.cc
@@ -14,10 +14,10 @@
 
 #include "paddle/phi/kernels/autotune/switch_autotune.h"
 
-#include "gflags/gflags.h"
 #include "glog/logging.h"
+#include "paddle/utils/flags.h"
 
-DECLARE_bool(use_autotune);
+PD_DECLARE_bool(use_autotune);
 
 namespace phi {
 namespace autotune {
diff --git a/paddle/phi/kernels/cpu/adam_kernel.cc b/paddle/phi/kernels/cpu/adam_kernel.cc
index 083c9dab740..1a63b779b02 100644
--- a/paddle/phi/kernels/cpu/adam_kernel.cc
+++ b/paddle/phi/kernels/cpu/adam_kernel.cc
@@ -24,7 +24,7 @@
 #include "paddle/phi/kernels/funcs/adam_functors.h"
 #include "paddle/phi/kernels/funcs/jit/kernels.h"
 
-DECLARE_int32(inner_op_parallelism);
+PD_DECLARE_int32(inner_op_parallelism);
 
 namespace phi {
 
diff --git a/paddle/phi/kernels/funcs/blas/blas_impl.cu.h b/paddle/phi/kernels/funcs/blas/blas_impl.cu.h
index 84eea97da9f..6e4f7e22781 100644
--- a/paddle/phi/kernels/funcs/blas/blas_impl.cu.h
+++ b/paddle/phi/kernels/funcs/blas/blas_impl.cu.h
@@ -17,8 +17,8 @@
 #if defined(__NVCC__)
 #include <thrust/device_vector.h>
 #endif
-#include "gflags/gflags.h"
 #include "glog/logging.h"
+#include "paddle/utils/flags.h"
 
 #include "paddle/phi/backends/dynload/cublas.h"
 #include "paddle/phi/backends/gpu/gpu_context.h"
diff --git a/paddle/phi/kernels/funcs/blas/blas_impl.hip.h b/paddle/phi/kernels/funcs/blas/blas_impl.hip.h
index 805a718ab85..224cf4d6cb4 100644
--- a/paddle/phi/kernels/funcs/blas/blas_impl.hip.h
+++ b/paddle/phi/kernels/funcs/blas/blas_impl.hip.h
@@ -14,13 +14,13 @@
 
 #pragma once
 
-#include "gflags/gflags.h"
+#include "paddle/utils/flags.h"
 
 #include "paddle/phi/backends/dynload/rocblas.h"
 #include "paddle/phi/backends/gpu/gpu_context.h"
 #include "paddle/phi/kernels/funcs/math_function.h"
 
-DECLARE_bool(enable_cublas_tensor_op_math);
+PD_DECLARE_bool(enable_cublas_tensor_op_math);
 
 namespace phi {
 namespace funcs {
diff --git a/paddle/phi/kernels/funcs/fused_gemm_epilogue.h b/paddle/phi/kernels/funcs/fused_gemm_epilogue.h
index 6f4eb46bf4e..eb5f0fa540f 100644
--- a/paddle/phi/kernels/funcs/fused_gemm_epilogue.h
+++ b/paddle/phi/kernels/funcs/fused_gemm_epilogue.h
@@ -26,7 +26,6 @@ limitations under the License. */
 
 #if CUDA_VERSION >= 11060
 
-#include "gflags/gflags.h"
 #include "glog/logging.h"
 #include "paddle/phi/backends/all_context.h"
 #include "paddle/phi/backends/dynload/cublasLt.h"
@@ -38,9 +37,10 @@ limitations under the License. */
 #include "paddle/phi/core/enforce.h"
 #include "paddle/phi/core/scope_guard.h"
 #include "paddle/phi/kernels/funcs/blas/blaslt_impl.cu.h"
+#include "paddle/utils/flags.h"
 #include "paddle/utils/optional.h"
 
-DECLARE_int64(cublaslt_exhaustive_search_times);
+PD_DECLARE_int64(cublaslt_exhaustive_search_times);
 
 namespace phi {
 namespace funcs {
diff --git a/paddle/phi/kernels/funcs/jit/benchmark.cc b/paddle/phi/kernels/funcs/jit/benchmark.cc
index ad7146db0d9..d5d1da73857 100644
--- a/paddle/phi/kernels/funcs/jit/benchmark.cc
+++ b/paddle/phi/kernels/funcs/jit/benchmark.cc
@@ -15,18 +15,18 @@
 #include <iostream>
 #include <random>
 
-#include "gflags/gflags.h"
 #include "glog/logging.h"
 #include "paddle/phi/api/profiler/device_tracer.h"
 #include "paddle/phi/common/place.h"
 #include "paddle/phi/core/dense_tensor.h"
 #include "paddle/phi/core/enforce.h"
 #include "paddle/phi/kernels/funcs/jit/kernels.h"
+#include "paddle/utils/flags.h"
 
-DEFINE_int32(burning, 10, "Burning times.");
-DEFINE_int32(repeat, 3000, "Repeat times.");
-DEFINE_int32(max_size, 1000, "The Max size would be tested.");
-DEFINE_string(filter, "", "The Benchmark name would be run.");  // NOLINT
+PD_DEFINE_int32(burning, 10, "Burning times.");
+PD_DEFINE_int32(repeat, 3000, "Repeat times.");
+PD_DEFINE_int32(max_size, 1000, "The Max size would be tested.");
+PD_DEFINE_string(filter, "", "The Benchmark name would be run.");  // NOLINT
 
 class BenchJITKernel {
  public:
@@ -546,7 +546,7 @@ BENCH_FP32_CPU(VBroadcast);
 //     --max_size: the max size would be tested
 //     --filter: the bench name would be run
 int main(int argc, char* argv[]) {
-  ::GFLAGS_NAMESPACE::ParseCommandLineFlags(&argc, &argv, true);
+  paddle::flags::ParseCommandLineFlags(&argc, &argv);
   google::InitGoogleLogging(argv[0]);
   LOG(INFO) << "Burning " << FLAGS_burning << " times, Repeat " << FLAGS_repeat
             << " times.";
diff --git a/paddle/phi/kernels/funcs/jit/gen_base.h b/paddle/phi/kernels/funcs/jit/gen_base.h
index dfad19eff34..f96f08005ab 100644
--- a/paddle/phi/kernels/funcs/jit/gen_base.h
+++ b/paddle/phi/kernels/funcs/jit/gen_base.h
@@ -22,9 +22,9 @@
 #include <malloc.h>  // for _aligned_malloc
 #endif
 
-#include "gflags/gflags.h"
 #include "paddle/phi/core/flags.h"
 #include "paddle/phi/kernels/funcs/jit/kernel_base.h"
+#include "paddle/utils/flags.h"
 
 PHI_DECLARE_bool(dump_jitcode);
 
diff --git a/paddle/phi/kernels/funcs/jit/test.cc b/paddle/phi/kernels/funcs/jit/test.cc
index 0dd5f6c6ba5..d388d95975c 100644
--- a/paddle/phi/kernels/funcs/jit/test.cc
+++ b/paddle/phi/kernels/funcs/jit/test.cc
@@ -16,15 +16,15 @@ limitations under the License. */
 #include <iostream>
 #include <random>
 
-#include "gflags/gflags.h"
 #include "glog/logging.h"
 #include "gtest/gtest.h"
 #include "paddle/phi/backends/cpu/cpu_info.h"
 #include "paddle/phi/common/place.h"
 #include "paddle/phi/core/enforce.h"
 #include "paddle/phi/kernels/funcs/jit/kernels.h"
+#include "paddle/utils/flags.h"
 
-DEFINE_double(acc, 1e-5, "Test accuracy threshold.");
+PD_DEFINE_double(acc, 1e-5, "Test accuracy threshold.");
 
 template <typename T>
 void RandomVec(const int n,
diff --git a/paddle/phi/kernels/gpu/batch_norm_grad_kernel.cu b/paddle/phi/kernels/gpu/batch_norm_grad_kernel.cu
index 0609bf945d9..31bad0987ba 100644
--- a/paddle/phi/kernels/gpu/batch_norm_grad_kernel.cu
+++ b/paddle/phi/kernels/gpu/batch_norm_grad_kernel.cu
@@ -34,7 +34,7 @@
 #define LAUNCH_BOUNDS(BlockDim)
 #endif
 
-DECLARE_bool(cudnn_batchnorm_spatial_persistent);
+PD_DECLARE_bool(cudnn_batchnorm_spatial_persistent);
 namespace phi {
 
 template <typename T>
diff --git a/paddle/phi/kernels/gpu/batch_norm_kernel.cu b/paddle/phi/kernels/gpu/batch_norm_kernel.cu
index 1a07e5f0d49..ad276ec6f18 100644
--- a/paddle/phi/kernels/gpu/batch_norm_kernel.cu
+++ b/paddle/phi/kernels/gpu/batch_norm_kernel.cu
@@ -41,7 +41,7 @@ namespace cub = hipcub;
 #define LAUNCH_BOUNDS(BlockDim)
 #endif
 
-DECLARE_bool(cudnn_batchnorm_spatial_persistent);
+PD_DECLARE_bool(cudnn_batchnorm_spatial_persistent);
 
 namespace phi {
 
diff --git a/paddle/phi/kernels/gpu/c_embedding_grad_kernel.cu b/paddle/phi/kernels/gpu/c_embedding_grad_kernel.cu
index aaa5c6865be..66057db357e 100644
--- a/paddle/phi/kernels/gpu/c_embedding_grad_kernel.cu
+++ b/paddle/phi/kernels/gpu/c_embedding_grad_kernel.cu
@@ -14,7 +14,6 @@
 
 #include "paddle/phi/kernels/c_embedding_kernel.h"
 
-#include "gflags/gflags.h"
 #include "glog/logging.h"
 
 #include "paddle/phi/backends/gpu/gpu_context.h"
@@ -22,8 +21,9 @@
 #include "paddle/phi/core/kernel_registry.h"
 #include "paddle/phi/kernels/funcs/eigen/common.h"
 #include "paddle/phi/kernels/funcs/embedding_grad.h"
+#include "paddle/utils/flags.h"
 
-DECLARE_int64(embedding_deterministic);
+PD_DECLARE_int64(embedding_deterministic);
 
 namespace phi {
 
diff --git a/paddle/phi/kernels/gpu/embedding_grad_kernel.cu b/paddle/phi/kernels/gpu/embedding_grad_kernel.cu
index 99ba12b1d62..a7c75e64a46 100644
--- a/paddle/phi/kernels/gpu/embedding_grad_kernel.cu
+++ b/paddle/phi/kernels/gpu/embedding_grad_kernel.cu
@@ -15,7 +15,6 @@
 #include "paddle/phi/kernels/embedding_grad_kernel.h"
 #include "paddle/phi/kernels/funcs/embedding_grad.h"
 
-#include "gflags/gflags.h"
 #include "glog/logging.h"
 #include "paddle/phi/backends/gpu/gpu_context.h"
 #include "paddle/phi/backends/gpu/gpu_primitives.h"
@@ -26,8 +25,9 @@
 #include "paddle/phi/core/mixed_vector.h"
 #include "paddle/phi/kernels/funcs/eigen/common.h"
 #include "paddle/phi/kernels/funcs/embedding_util.h"
+#include "paddle/utils/flags.h"
 
-DECLARE_int64(embedding_deterministic);
+PD_DECLARE_int64(embedding_deterministic);
 
 namespace phi {
 
diff --git a/paddle/phi/kernels/gpu/flash_attn_grad_kernel.cu b/paddle/phi/kernels/gpu/flash_attn_grad_kernel.cu
index 60a1c54d726..7b76a5f458d 100644
--- a/paddle/phi/kernels/gpu/flash_attn_grad_kernel.cu
+++ b/paddle/phi/kernels/gpu/flash_attn_grad_kernel.cu
@@ -24,7 +24,7 @@
 #include "paddle/phi/kernels/gpu/flash_attn_utils.h"
 #include "paddle/phi/kernels/reshape_kernel.h"
 
-DECLARE_bool(cudnn_deterministic);
+PD_DECLARE_bool(cudnn_deterministic);
 
 namespace phi {
 
diff --git a/paddle/phi/kernels/gpu/flash_attn_kernel.cu b/paddle/phi/kernels/gpu/flash_attn_kernel.cu
index bcf8791d3c1..4bd5e28c09f 100644
--- a/paddle/phi/kernels/gpu/flash_attn_kernel.cu
+++ b/paddle/phi/kernels/gpu/flash_attn_kernel.cu
@@ -24,7 +24,7 @@
 #include "paddle/phi/kernels/gpu/flash_attn_utils.h"
 #include "paddle/phi/kernels/reshape_kernel.h"
 
-DECLARE_bool(cudnn_deterministic);
+PD_DECLARE_bool(cudnn_deterministic);
 
 namespace phi {
 
diff --git a/paddle/phi/kernels/gpu/gelu_grad_kernel.cu b/paddle/phi/kernels/gpu/gelu_grad_kernel.cu
index b1ffa921f91..2b847fb216b 100644
--- a/paddle/phi/kernels/gpu/gelu_grad_kernel.cu
+++ b/paddle/phi/kernels/gpu/gelu_grad_kernel.cu
@@ -21,7 +21,7 @@
 #include "paddle/phi/kernels/funcs/broadcast_function.h"
 #include "paddle/phi/kernels/gpu/gelu_funcs.h"
 
-DECLARE_bool(use_fast_math);
+PD_DECLARE_bool(use_fast_math);
 
 namespace phi {
 
diff --git a/paddle/phi/kernels/gpu/gelu_kernel.cu b/paddle/phi/kernels/gpu/gelu_kernel.cu
index e0792c387d7..8400b5d8cd5 100644
--- a/paddle/phi/kernels/gpu/gelu_kernel.cu
+++ b/paddle/phi/kernels/gpu/gelu_kernel.cu
@@ -25,7 +25,7 @@
 #include "paddle/phi/kernels/gpu/gelu_funcs.h"
 // clang-format on
 
-DECLARE_bool(use_fast_math);
+PD_DECLARE_bool(use_fast_math);
 
 namespace phi {
 
diff --git a/paddle/phi/kernels/gpu/index_add_kernel.cu b/paddle/phi/kernels/gpu/index_add_kernel.cu
index 9ba5bde7f61..8fd15d5435f 100644
--- a/paddle/phi/kernels/gpu/index_add_kernel.cu
+++ b/paddle/phi/kernels/gpu/index_add_kernel.cu
@@ -14,15 +14,15 @@
 
 #include "paddle/phi/kernels/index_add_kernel.h"
 
-#include "gflags/gflags.h"
 #include "glog/logging.h"
 #include "paddle/phi/backends/gpu/gpu_info.h"
 #include "paddle/phi/backends/gpu/gpu_launch_config.h"
 #include "paddle/phi/backends/gpu/gpu_primitives.h"
 #include "paddle/phi/core/kernel_registry.h"
 #include "paddle/phi/core/utils/data_type.h"
+#include "paddle/utils/flags.h"
 
-DECLARE_bool(cudnn_deterministic);
+PD_DECLARE_bool(cudnn_deterministic);
 
 namespace phi {
 
diff --git a/paddle/phi/kernels/gpu/index_select_grad_kernel.cu b/paddle/phi/kernels/gpu/index_select_grad_kernel.cu
index 64e3428cc9a..03f74888fca 100644
--- a/paddle/phi/kernels/gpu/index_select_grad_kernel.cu
+++ b/paddle/phi/kernels/gpu/index_select_grad_kernel.cu
@@ -14,7 +14,6 @@
 
 #include "paddle/phi/kernels/index_select_grad_kernel.h"
 
-#include "gflags/gflags.h"
 #include "glog/logging.h"
 #include "paddle/phi/backends/gpu/gpu_info.h"
 #include "paddle/phi/backends/gpu/gpu_launch_config.h"
@@ -22,8 +21,9 @@
 #include "paddle/phi/core/kernel_registry.h"
 #include "paddle/phi/core/utils/data_type.h"
 #include "paddle/phi/kernels/funcs/math_function.h"
+#include "paddle/utils/flags.h"
 
-DECLARE_bool(cudnn_deterministic);
+PD_DECLARE_bool(cudnn_deterministic);
 
 namespace phi {
 
diff --git a/paddle/phi/kernels/gpu/layer_norm_kernel.cu b/paddle/phi/kernels/gpu/layer_norm_kernel.cu
index c5bb0c288f2..eb85d9ac826 100644
--- a/paddle/phi/kernels/gpu/layer_norm_kernel.cu
+++ b/paddle/phi/kernels/gpu/layer_norm_kernel.cu
@@ -13,13 +13,13 @@
 // limitations under the License.
 
 #include "paddle/phi/kernels/layer_norm_kernel.h"
-#include "gflags/gflags.h"
 #include "paddle/phi/backends/gpu/gpu_context.h"
 #include "paddle/phi/core/kernel_registry.h"
 #include "paddle/phi/kernels/funcs/layer_norm_impl.cu.h"
 #include "paddle/phi/kernels/funcs/layer_norm_util.h"
+#include "paddle/utils/flags.h"
 
-DECLARE_bool(use_fast_math);
+PD_DECLARE_bool(use_fast_math);
 
 namespace phi {
 
diff --git a/paddle/phi/kernels/gpu/randperm_kernel.cu b/paddle/phi/kernels/gpu/randperm_kernel.cu
index 2ad512701e0..4c6597b93f9 100644
--- a/paddle/phi/kernels/gpu/randperm_kernel.cu
+++ b/paddle/phi/kernels/gpu/randperm_kernel.cu
@@ -26,7 +26,6 @@
 namespace cub = hipcub;
 #endif
 
-#include "gflags/gflags.h"
 #include "paddle/phi/backends/gpu/gpu_launch_config.h"
 #include "paddle/phi/common/amp_type_traits.h"
 #include "paddle/phi/common/memory_utils.h"
@@ -34,6 +33,7 @@ namespace cub = hipcub;
 #include "paddle/phi/kernels/empty_kernel.h"
 #include "paddle/phi/kernels/funcs/for_range.h"
 #include "paddle/phi/kernels/randint_kernel.h"
+#include "paddle/utils/flags.h"
 
 namespace phi {
 
diff --git a/paddle/phi/kernels/gpu/uniform_inplace_kernel.cu b/paddle/phi/kernels/gpu/uniform_inplace_kernel.cu
index 653a64b127a..87bae7fe564 100644
--- a/paddle/phi/kernels/gpu/uniform_inplace_kernel.cu
+++ b/paddle/phi/kernels/gpu/uniform_inplace_kernel.cu
@@ -16,11 +16,11 @@ limitations under the License. */
 
 #include <thrust/random.h>
 
-#include "gflags/gflags.h"
 #include "paddle/phi/common/amp_type_traits.h"
 #include "paddle/phi/core/kernel_registry.h"
 #include "paddle/phi/kernels/funcs/distribution_helper.h"
 #include "paddle/phi/kernels/funcs/index_impl.cu.h"
+#include "paddle/utils/flags.h"
 
 namespace phi {
 
diff --git a/paddle/phi/kernels/gpu/uniform_kernel.cu b/paddle/phi/kernels/gpu/uniform_kernel.cu
index 04217db0a74..2a514947bb7 100644
--- a/paddle/phi/kernels/gpu/uniform_kernel.cu
+++ b/paddle/phi/kernels/gpu/uniform_kernel.cu
@@ -16,10 +16,10 @@
 
 #include <thrust/random.h>
 
-#include "gflags/gflags.h"
 #include "paddle/phi/core/kernel_registry.h"
 #include "paddle/phi/kernels/funcs/distribution_helper.h"
 #include "paddle/phi/kernels/funcs/index_impl.cu.h"
+#include "paddle/utils/flags.h"
 
 namespace phi {
 
diff --git a/paddle/phi/kernels/gpudnn/conv_gpudnn_info.h b/paddle/phi/kernels/gpudnn/conv_gpudnn_info.h
index 9b7b35f4357..0dd3d7f0cde 100644
--- a/paddle/phi/kernels/gpudnn/conv_gpudnn_info.h
+++ b/paddle/phi/kernels/gpudnn/conv_gpudnn_info.h
@@ -20,9 +20,9 @@ limitations under the License. */
 
 #include "paddle/phi/backends/gpu/gpu_dnn.h"
 
-DECLARE_int64(conv_workspace_size_limit);
-DECLARE_bool(cudnn_exhaustive_search);
-DECLARE_int64(cudnn_exhaustive_search_times);
+PD_DECLARE_int64(conv_workspace_size_limit);
+PD_DECLARE_bool(cudnn_exhaustive_search);
+PD_DECLARE_int64(cudnn_exhaustive_search_times);
 
 namespace phi {
 
diff --git a/paddle/phi/kernels/impl/conv_cudnn_impl.h b/paddle/phi/kernels/impl/conv_cudnn_impl.h
index c918eeec831..81a7c772620 100644
--- a/paddle/phi/kernels/impl/conv_cudnn_impl.h
+++ b/paddle/phi/kernels/impl/conv_cudnn_impl.h
@@ -30,9 +30,9 @@
 #include "paddle/phi/kernels/funcs/batch_norm_utils.h"
 #include "paddle/phi/kernels/funcs/padding.h"
 
-DECLARE_bool(cudnn_deterministic);
-DECLARE_int64(conv_workspace_size_limit);
-DECLARE_bool(cudnn_exhaustive_search);
+PD_DECLARE_bool(cudnn_deterministic);
+PD_DECLARE_int64(conv_workspace_size_limit);
+PD_DECLARE_bool(cudnn_exhaustive_search);
 
 namespace phi {
 
diff --git a/paddle/phi/kernels/impl/einsum_impl.h b/paddle/phi/kernels/impl/einsum_impl.h
index d297e786845..e32f64f347f 100644
--- a/paddle/phi/kernels/impl/einsum_impl.h
+++ b/paddle/phi/kernels/impl/einsum_impl.h
@@ -15,8 +15,8 @@
 
 #include <set>
 
-#include "gflags/gflags.h"
 #include "glog/logging.h"
+#include "paddle/utils/flags.h"
 
 #include "paddle/phi/core/dense_tensor.h"
 #include "paddle/phi/kernels/diagonal_kernel.h"
@@ -27,7 +27,7 @@
 #include "paddle/phi/kernels/transpose_kernel.h"
 #include "paddle/utils/string/string_helper.h"
 
-DECLARE_bool(einsum_opt);
+PD_DECLARE_bool(einsum_opt);
 
 namespace phi {
 
diff --git a/paddle/phi/kernels/legacy/gpu/uniform_kernel.cu b/paddle/phi/kernels/legacy/gpu/uniform_kernel.cu
index 609238435c9..abf51cf61f2 100644
--- a/paddle/phi/kernels/legacy/gpu/uniform_kernel.cu
+++ b/paddle/phi/kernels/legacy/gpu/uniform_kernel.cu
@@ -16,10 +16,10 @@
 
 #include <thrust/random.h>
 
-#include "gflags/gflags.h"
 #include "paddle/phi/core/kernel_registry.h"
 #include "paddle/phi/kernels/funcs/distribution_helper.h"
 #include "paddle/phi/kernels/funcs/index_impl.cu.h"
+#include "paddle/utils/flags.h"
 
 namespace phi {
 
diff --git a/paddle/phi/kernels/selected_rows/cpu/adam_kernel.cc b/paddle/phi/kernels/selected_rows/cpu/adam_kernel.cc
index c4732db041e..b93975c188b 100644
--- a/paddle/phi/kernels/selected_rows/cpu/adam_kernel.cc
+++ b/paddle/phi/kernels/selected_rows/cpu/adam_kernel.cc
@@ -14,8 +14,8 @@
 
 #include "paddle/phi/kernels/selected_rows/adam_kernel.h"
 
-#include "gflags/gflags.h"
 #include "glog/logging.h"
+#include "paddle/utils/flags.h"
 
 #include "paddle/phi/backends/cpu/cpu_context.h"
 #include "paddle/phi/core/kernel_registry.h"
@@ -24,7 +24,7 @@
 #include "paddle/phi/kernels/funcs/adam_functors.h"
 #include "paddle/phi/kernels/funcs/selected_rows_functor.h"
 
-DECLARE_int32(inner_op_parallelism);
+PD_DECLARE_int32(inner_op_parallelism);
 
 namespace phi {
 namespace sr {
diff --git a/paddle/testing/paddle_gtest_main.cc b/paddle/testing/paddle_gtest_main.cc
index b22f0c3a450..6701693545e 100644
--- a/paddle/testing/paddle_gtest_main.cc
+++ b/paddle/testing/paddle_gtest_main.cc
@@ -12,15 +12,15 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
 
-#include "gflags/gflags.h"
 #include "gtest/gtest.h"
 #include "paddle/fluid/framework/phi_utils.h"
 #include "paddle/fluid/memory/allocation/allocator_strategy.h"
 #include "paddle/fluid/platform/init.h"
 #include "paddle/phi/core/flags.h"
+#include "paddle/utils/flags.h"
 
 #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
-DECLARE_bool(enable_gpu_memory_usage_log);
+PD_DECLARE_bool(enable_gpu_memory_usage_log);
 #endif
 
 int main(int argc, char** argv) {  // NOLINT
@@ -32,11 +32,8 @@ int main(int argc, char** argv) {  // NOLINT
   }
 
   std::vector<std::string> envs;
-  std::vector<std::string> undefok;
 #if defined(PADDLE_WITH_DISTRIBUTE) && !defined(PADDLE_WITH_PSLIB)
-  std::string str_max_body_size;
-  if (::GFLAGS_NAMESPACE::GetCommandLineOption("max_body_size",
-                                               &str_max_body_size)) {
+  if (paddle::flags::FindFlag("max_body_size")) {
     setenv("FLAGS_max_body_size", "2147483647", 1);
     envs.push_back("max_body_size");
   }
@@ -45,18 +42,8 @@ int main(int argc, char** argv) {  // NOLINT
   const auto& flag_map = phi::GetExportedFlagInfoMap();
   for (const auto& pair : flag_map) {
     const std::string& name = pair.second.name;
-    // NOTE(zhiqiu): some names may not linked in some tests, so add to
-    // `undefok`.
-    // One way to handle that is to check each flag item by item, and put it in
-    // `envs` or `undefok`;
-    // another way is to add all flags to `envs` and `undeok`, basically it is
-    // not a good design,
-    // but it can simplify the procedure of creating new flag and seems no side
-    // effects.
-    // see details: https://gflags.github.io/gflags/#special
     if (pair.second.is_writable) {  // means public
       envs.push_back(name);
-      undefok.push_back(name);
     }
   }
 
@@ -72,20 +59,8 @@ int main(int argc, char** argv) {  // NOLINT
     VLOG(1) << "gtest env_string:" << env_string;
   }
 
-  char* undefok_str = nullptr;
-  if (!undefok.empty()) {
-    std::string undefok_string = "--undefok=";
-    for (auto t : undefok) {
-      undefok_string += t + ",";
-    }
-    undefok_string = undefok_string.substr(0, undefok_string.length() - 1);
-    undefok_str = strdup(undefok_string.c_str());
-    new_argv.push_back(undefok_str);
-    VLOG(1) << "gtest undefok_string:" << undefok_string;
-  }
-
 #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
-  if (strstr(undefok_str, "enable_gpu_memory_usage_log")) {
+  if (strstr(env_str, "enable_gpu_memory_usage_log")) {
     VLOG(1) << "Set FLAGS_enable_gpu_memory_usage_log to true";
     FLAGS_enable_gpu_memory_usage_log = true;
   }
@@ -93,8 +68,8 @@ int main(int argc, char** argv) {  // NOLINT
 
   int new_argc = static_cast<int>(new_argv.size());
   char** new_argv_address = new_argv.data();
-  ::GFLAGS_NAMESPACE::ParseCommandLineFlags(
-      &new_argc, &new_argv_address, false);
+  paddle::flags::AllowUndefinedFlags();
+  paddle::flags::ParseCommandLineFlags(&new_argc, &new_argv_address);
   paddle::framework::InitMemoryMethod();
   paddle::framework::InitDevices();
   paddle::framework::InitDefaultKernelSignatureMap();
@@ -102,6 +77,5 @@ int main(int argc, char** argv) {  // NOLINT
   int ret = RUN_ALL_TESTS();
 
   if (env_str) free(env_str);
-  if (undefok_str) free(undefok_str);
   return ret;
 }
diff --git a/paddle/utils/CMakeLists.txt b/paddle/utils/CMakeLists.txt
index dbb53a3ac36..bb177e7578b 100644
--- a/paddle/utils/CMakeLists.txt
+++ b/paddle/utils/CMakeLists.txt
@@ -23,3 +23,11 @@ if(NOT ((NOT WITH_PYTHON) AND ON_INFER))
     SRCS pybind.cc
     DEPS phi)
 endif()
+
+if(NOT WITH_GFLAGS)
+  cc_library(paddle_flags SRCS flags_native.cc)
+  cc_test(
+    flags_native_test
+    SRCS flags_native_test.cc
+    DEPS paddle_flags)
+endif()
diff --git a/paddle/utils/flags.h b/paddle/utils/flags.h
new file mode 100644
index 00000000000..5a019f1439b
--- /dev/null
+++ b/paddle/utils/flags.h
@@ -0,0 +1,77 @@
+// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#ifdef PADDLE_WITH_GFLAGS
+#include "gflags/gflags.h"
+#else
+#include "paddle/utils/flags_native.h"
+#endif
+
+#ifdef PADDLE_WITH_GFLAGS
+#define PD_DEFINE_bool(name, val, txt) DEFINE_bool(name, val, txt)
+#define PD_DEFINE_int32(name, val, txt) DEFINE_int32(name, val, txt)
+#define PD_DEFINE_uint32(name, val, txt) DEFINE_uint32(name, val, txt)
+#define PD_DEFINE_int64(name, val, txt) DEFINE_int64(name, val, txt)
+#define PD_DEFINE_uint64(name, val, txt) DEFINE_uint64(name, val, txt)
+#define PD_DEFINE_double(name, val, txt) DEFINE_double(name, val, txt)
+#define PD_DEFINE_string(name, val, txt) DEFINE_string(name, val, txt)
+
+#define PD_DECLARE_bool(name) DECLARE_bool(name)
+#define PD_DECLARE_int32(name) DECLARE_int32(name)
+#define PD_DECLARE_uint32(name) DECLARE_uint32(name)
+#define PD_DECLARE_int64(name) DECLARE_int64(name)
+#define PD_DECLARE_uint64(name) DECLARE_uint64(name)
+#define PD_DECLARE_double(name) DECLARE_double(name)
+#define PD_DECLARE_string(name) DECLARE_string(name)
+#endif
+
+namespace paddle {
+namespace flags {
+
+#ifdef PADDLE_WITH_GFLAGS
+inline void ParseCommandLineFlags(int* argc, char*** argv) {
+  gflags::ParseCommandLineFlags(argc, argv, true);
+}
+#else
+using paddle::flags::ParseCommandLineFlags;
+#endif
+
+#ifdef PADDLE_WITH_GFLAGS
+inline bool SetFlagValue(const char* name, const char* value) {
+  std::string ret = gflags::SetCommandLineOption(name, value);
+  return ret.empty() ? false : true;
+}
+#else
+using paddle::flags::SetFlagValue;
+#endif
+
+#ifdef PADDLE_WITH_GFLAGS
+inline bool FindFlag(const char* name) {
+  std::string value;
+  return gflags::GetCommandLineOption(name, &value);
+}
+#else
+using paddle::flags::FindFlag;
+#endif
+
+#ifdef PADDLE_WITH_GFLAGS
+inline void AllowUndefinedFlags() { gflags::AllowCommandLineReparsing(); }
+#else
+using paddle::flags::AllowUndefinedFlags;
+#endif
+
+}  // namespace flags
+}  // namespace paddle
diff --git a/paddle/utils/flags_native.cc b/paddle/utils/flags_native.cc
new file mode 100644
index 00000000000..05d90d8adf2
--- /dev/null
+++ b/paddle/utils/flags_native.cc
@@ -0,0 +1,484 @@
+// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "paddle/utils/flags_native.h"
+
+#include <assert.h>
+#include <stdlib.h>
+#include <fstream>
+#include <iostream>
+#include <map>
+#include <mutex>
+#include <set>
+#include <sstream>
+
+namespace paddle {
+namespace flags {
+
+std::stringstream& ErrorStream() {
+  static std::stringstream err_ss;
+  return err_ss;
+}
+
+inline void exit_with_errors() {
+  std::cerr << ErrorStream().str();
+  exit(-1);
+}
+
+#define LOG_FLAG_ERROR(message)                                             \
+  ErrorStream() << "paddle flags error: " << message << " (at " << __FILE__ \
+                << ":" << __LINE__ << ")" << std::endl
+
+#define LOG_FLAG_FATAL_ERROR(message) \
+  LOG_FLAG_ERROR(message);            \
+  exit_with_errors()
+
+enum class FlagType : uint8_t {
+  BOOL = 0,
+  INT32 = 1,
+  UINT32 = 2,
+  INT64 = 3,
+  UINT64 = 4,
+  DOUBLE = 5,
+  STRING = 6,
+  UNDEFINED = 7,
+};
+
+class Flag {
+ public:
+  Flag(std::string name,
+       std::string description,
+       std::string file,
+       FlagType type,
+       const void* default_value,
+       void* value)
+      : name_(name),
+        description_(description),
+        file_(file),
+        type_(type),
+        default_value_(default_value),
+        value_(value) {}
+  ~Flag() = default;
+
+  // Summary: --name_: type_, description_ (default: default_value_)
+  std::string Summary() const;
+
+  void SetValueFromString(const std::string& value);
+
+ private:
+  friend class FlagRegistry;
+
+  const std::string name_;
+  const std::string description_;
+  const std::string file_;
+  const FlagType type_;
+  const void* default_value_;
+  void* value_;
+};
+
+class FlagRegistry {
+ public:
+  static FlagRegistry* Instance() {
+    static FlagRegistry* global_registry_ = new FlagRegistry();
+    return global_registry_;
+  }
+
+  void RegisterFlag(Flag* flag);
+
+  bool SetFlagValue(const std::string& name, const std::string& value);
+
+  bool HasFlag(const std::string& name) const;
+
+  void PrintAllFlagHelp(std::ostream& os) const;
+
+ private:
+  FlagRegistry() = default;
+
+  std::map<std::string, Flag*> flags_;
+
+  struct FlagCompare {
+    bool operator()(const Flag* flag1, const Flag* flag2) const {
+      return flag1->name_ < flag2->name_;
+    }
+  };
+
+  std::map<std::string, std::set<Flag*, FlagCompare>> flags_by_file_;
+
+  std::mutex mutex_;
+};
+
+template <typename T>
+struct FlagTypeTraits {
+  static constexpr FlagType Type = FlagType::UNDEFINED;
+};
+
+#define DEFINE_FLAG_TYPE_TRAITS(type, flag_type) \
+  template <>                                    \
+  struct FlagTypeTraits<type> {                  \
+    static constexpr FlagType Type = flag_type;  \
+  }
+
+DEFINE_FLAG_TYPE_TRAITS(bool, FlagType::BOOL);
+DEFINE_FLAG_TYPE_TRAITS(int32_t, FlagType::INT32);
+DEFINE_FLAG_TYPE_TRAITS(uint32_t, FlagType::UINT32);
+DEFINE_FLAG_TYPE_TRAITS(int64_t, FlagType::INT64);
+DEFINE_FLAG_TYPE_TRAITS(uint64_t, FlagType::UINT64);
+DEFINE_FLAG_TYPE_TRAITS(double, FlagType::DOUBLE);
+DEFINE_FLAG_TYPE_TRAITS(std::string, FlagType::STRING);
+
+#undef DEFINE_FLAG_TYPE_TRAITS
+
+template <typename T>
+FlagRegisterer::FlagRegisterer(std::string name,
+                               std::string help,
+                               std::string file,
+                               const T* default_value,
+                               T* value) {
+  FlagType type = FlagTypeTraits<T>::Type;
+  Flag* flag = new Flag(name, help, file, type, default_value, value);
+  FlagRegistry::Instance()->RegisterFlag(flag);
+}
+
+// Instantiate FlagRegisterer for supported types.
+#define INSTANTIATE_FLAG_REGISTERER(type)                            \
+  template FlagRegisterer::FlagRegisterer(std::string name,          \
+                                          std::string help,          \
+                                          std::string file,          \
+                                          const type* default_value, \
+                                          type* value)
+
+INSTANTIATE_FLAG_REGISTERER(bool);
+INSTANTIATE_FLAG_REGISTERER(int32_t);
+INSTANTIATE_FLAG_REGISTERER(uint32_t);
+INSTANTIATE_FLAG_REGISTERER(int64_t);
+INSTANTIATE_FLAG_REGISTERER(uint64_t);
+INSTANTIATE_FLAG_REGISTERER(double);
+INSTANTIATE_FLAG_REGISTERER(std::string);
+
+#undef INSTANTIATE_FLAG_REGISTERER
+
+std::string FlagType2String(FlagType type) {
+  switch (type) {
+    case FlagType::BOOL:
+      return "bool";
+    case FlagType::INT32:
+      return "int32";
+    case FlagType::UINT32:
+      return "uint32";
+    case FlagType::INT64:
+      return "int64";
+    case FlagType::UINT64:
+      return "uint64";
+    case FlagType::DOUBLE:
+      return "double";
+    case FlagType::STRING:
+      return "string";
+    default:
+      return "undefined";
+  }
+}
+
+std::string Value2String(const void* value, FlagType type) {
+  switch (type) {
+    case FlagType::BOOL: {
+      const bool* val = static_cast<const bool*>(value);
+      return *val ? "true" : "false";
+    }
+    case FlagType::INT32: {
+      const int32_t* val = static_cast<const int32_t*>(value);
+      return std::to_string(*val);
+    }
+    case FlagType::UINT32: {
+      const uint32_t* val = static_cast<const uint32_t*>(value);
+      return std::to_string(*val);
+    }
+    case FlagType::INT64: {
+      const int64_t* val = static_cast<const int64_t*>(value);
+      return std::to_string(*val);
+    }
+    case FlagType::UINT64: {
+      const uint64_t* val = static_cast<const uint64_t*>(value);
+      return std::to_string(*val);
+    }
+    case FlagType::DOUBLE: {
+      const double* val = static_cast<const double*>(value);
+      return std::to_string(*val);
+    }
+    case FlagType::STRING: {
+      const std::string* val = static_cast<const std::string*>(value);
+      return *val;
+    }
+    default:
+      LOG_FLAG_FATAL_ERROR("flag type is undefined.");
+      return "";
+  }
+}
+
+std::string Flag::Summary() const {
+  return "--" + name_ + ": " + FlagType2String(type_) + ", " + description_ +
+         " (default: " + Value2String(default_value_, type_) + ")";
+}
+
+void Flag::SetValueFromString(const std::string& value) {
+  try {
+    switch (type_) {
+      case FlagType::BOOL: {
+        bool* val = static_cast<bool*>(value_);
+        if (value == "true" || value == "True" || value == "TRUE" ||
+            value == "1") {
+          *val = true;
+        } else if (value == "false" || value == "False" || value == "FALSE" ||
+                   value == "0") {
+          *val = false;
+        } else {
+          throw std::invalid_argument(
+              ", please use [true, True, TRUE, 1] or [false, False, FALSE, "
+              "0].");
+        }
+        break;
+      }
+      case FlagType::INT32: {
+        int32_t* val = static_cast<int32_t*>(value_);
+        *val = std::stoi(value);
+        break;
+      }
+      case FlagType::UINT32: {
+        uint32_t* val = static_cast<uint32_t*>(value_);
+        *val = std::stoul(value);
+        break;
+      }
+      case FlagType::INT64: {
+        int64_t* val = static_cast<int64_t*>(value_);
+        *val = std::stoll(value);
+        break;
+      }
+      case FlagType::UINT64: {
+        uint64_t* val = static_cast<uint64_t*>(value_);
+        *val = std::stoull(value);
+        break;
+      }
+      case FlagType::DOUBLE: {
+        double* val = static_cast<double*>(value_);
+        *val = std::stod(value);
+        break;
+      }
+      case FlagType::STRING: {
+        std::string* val = static_cast<std::string*>(value_);
+        *val = value;
+        break;
+      }
+      default: {
+        LOG_FLAG_FATAL_ERROR("flag type is undefined.");
+      }
+    }
+  } catch (const std::exception& e) {
+    std::string error_msg = "value: \"" + value + "\" is invalid for " +
+                            FlagType2String(type_) + " flag \"" + name_ + "\"";
+    if (type_ == FlagType::BOOL) {
+      error_msg += e.what();
+    } else {
+      error_msg += ".";
+    }
+    LOG_FLAG_ERROR(error_msg);
+  }
+}
+
+void FlagRegistry::RegisterFlag(Flag* flag) {
+  auto iter = flags_.find(flag->name_);
+  if (iter != flags_.end()) {
+    LOG_FLAG_FATAL_ERROR("illegal RegisterFlag, flag \"" + flag->name_ +
+                         "\" has been defined in " + iter->second->file_);
+  } else {
+    std::lock_guard<std::mutex> lock(mutex_);
+    flags_[flag->name_] = flag;
+    flags_by_file_[flag->file_].insert(flag);
+  }
+}
+
+bool FlagRegistry::SetFlagValue(const std::string& name,
+                                const std::string& value) {
+  if (HasFlag(name)) {
+    std::lock_guard<std::mutex> lock(mutex_);
+    flags_[name]->SetValueFromString(value);
+    return true;
+  } else {
+    LOG_FLAG_ERROR("illegal SetFlagValue, flag \"" + name +
+                   "\" is not defined.");
+    return false;
+  }
+}
+
+bool FlagRegistry::HasFlag(const std::string& name) const {
+  return flags_.find(name) != flags_.end();
+}
+
+void FlagRegistry::PrintAllFlagHelp(std::ostream& os) const {
+  for (const auto& iter : flags_by_file_) {
+    os << std::endl << "Flags defined in " << iter.first << ":" << std::endl;
+    for (const auto& flag : iter.second) {
+      os << "  " << flag->Summary() << std::endl;
+    }
+  }
+  os << std::endl;
+}
+
+void PrintAllFlagHelp(bool to_file, const std::string& file_path) {
+  if (to_file) {
+    std::ofstream fout(file_path);
+    FlagRegistry::Instance()->PrintAllFlagHelp(fout);
+  } else {
+    FlagRegistry::Instance()->PrintAllFlagHelp(std::cout);
+  }
+}
+
+bool SetFlagValue(const std::string& name, const std::string& value) {
+  return FlagRegistry::Instance()->SetFlagValue(name, value);
+}
+
+bool FindFlag(const std::string& name) {
+  return FlagRegistry::Instance()->HasFlag(name);
+}
+
+bool GetValueFromEnv(const std::string& name, std::string* value) {
+  const char* env_var = std::getenv(name.c_str());
+  if (env_var == nullptr) {
+    return false;
+  }
+  *value = std::string(env_var);
+  return true;
+}
+
+void SetFlagsFromEnv(const std::vector<std::string>& flags, bool error_fatal) {
+  bool success = true;
+  for (const std::string& flag_name : flags) {
+    std::string env_var_name = std::string("FLAGS_") + flag_name;
+    std::string env_var_value;
+    if (GetValueFromEnv(env_var_name, &env_var_value)) {
+      success =
+          FlagRegistry::Instance()->SetFlagValue(flag_name, env_var_value);
+    } else if (error_fatal) {
+      LOG_FLAG_ERROR("environment variable \"" + env_var_name +
+                     "\" is not set.");
+      success = false;
+    }
+  }
+  if (error_fatal && !success) {
+    exit_with_errors();
+  }
+}
+
+static bool allow_undefined_flags = false;
+
+void AllowUndefinedFlags() { allow_undefined_flags = true; }
+
+void ParseCommandLineFlags(int* pargc, char*** pargv) {
+  assert(*pargc > 0);
+  size_t argv_num = *pargc - 1;
+  std::vector<std::string> argvs(*pargv + 1, *pargv + *pargc);
+
+  std::string arg_format_help =
+      "please follow the formats: \"--help(h)\", \"--name=value\""
+      " or \"--name value\".";
+  for (size_t i = 0; i < argv_num; i++) {
+    const std::string& argv = argvs[i];
+
+    if (argv.size() < 2 || argv[0] != '-') {
+      LOG_FLAG_FATAL_ERROR("invalid commandline argument: \"" + argv + "\", " +
+                           arg_format_help);
+    }
+
+    // parse arg name and value
+    size_t hyphen_num = argv[1] == '-' ? 2 : 1;
+    std::string name, value;
+    size_t split_pos = argv.find('=');
+    if (split_pos == std::string::npos) {
+      // the argv format is "--name" or "--name value"
+      name = argv.substr(hyphen_num);
+      if (name.empty()) {
+        LOG_FLAG_FATAL_ERROR("invalid commandline argument: \"" + argv +
+                             "\", " + arg_format_help);
+      }
+
+      // print help message
+      if (name == "help" || name == "h") {
+        FlagRegistry::Instance()->PrintAllFlagHelp(std::cout);
+        exit(1);
+      }
+
+      // get the value from next argv.
+      if (++i == argv_num) {
+        LOG_FLAG_FATAL_ERROR("expected value of flag \"" + name +
+                             "\" but found none.");
+      } else {
+        value = argvs[i];
+      }
+    } else {
+      // the argv format is "--name=value"
+      if (split_pos == hyphen_num || split_pos == argv.size() - 1) {
+        LOG_FLAG_FATAL_ERROR("invalid commandline argument: \"" + argv +
+                             "\", " + arg_format_help);
+      }
+      name = argv.substr(hyphen_num, split_pos - hyphen_num);
+      value = argv.substr(split_pos + 1);
+    }
+
+    // special case for flag value enclosed in ""
+    if (value[0] == '"') {
+      value = value.substr(1);
+      if (value.back() == '"') {
+        value.pop_back();
+      } else {
+        while (i < argv_num) {
+          value += " ";
+          value += argvs[++i];
+          if (value.back() == '"') {
+            break;
+          }
+        }
+        if (value.back() == '"') {
+          value.pop_back();
+        } else {
+          LOG_FLAG_FATAL_ERROR("unexperted end of flag \"" + name +
+                               "\" value while looking for matching `\"'");
+        }
+      }
+    }
+
+    if (name == "fromenv" || name == "tryfromenv") {
+      // Value of --fromenv or --tryfromenv should be
+      // a comma separated list of env var names.
+      std::vector<std::string> env_flag_names;
+      for (size_t start_pos = 0, end_pos = 0;
+           start_pos < value.size() && end_pos != std::string::npos;
+           start_pos = end_pos + 1) {
+        end_pos = value.find(',', start_pos);
+        env_flag_names.push_back(value.substr(start_pos, end_pos - start_pos));
+      }
+      if (name == "fromenv") {
+        SetFlagsFromEnv(env_flag_names, true);
+      } else {
+        SetFlagsFromEnv(env_flag_names, false);
+      }
+      continue;
+    }
+
+    FlagRegistry::Instance()->SetFlagValue(name, value);
+  }
+  if (!allow_undefined_flags && !ErrorStream().str().empty()) {
+    exit_with_errors();
+  }
+}
+
+}  // namespace flags
+}  // namespace paddle
diff --git a/paddle/utils/flags_native.h b/paddle/utils/flags_native.h
new file mode 100644
index 00000000000..04814a4f679
--- /dev/null
+++ b/paddle/utils/flags_native.h
@@ -0,0 +1,131 @@
+// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include <string>
+#include <vector>
+
+// This is a simple commandline flags tool for paddle, which is inspired by
+// gflags but only implements the following necessary features:
+// 1. Define or declare a flag.
+// 2. Parse commandline flags.
+// 3. Other utility functions.
+
+namespace paddle {
+namespace flags {
+/**
+ * @brief Parse commandline flags.
+ *
+ * It recieves commandline arguments passed in argc and argv from main function,
+ * argv[0] is the program name, and argv[1:] are the commandline arguments
+ * which matching the format "--name=value" or "--name value". After parsing,
+ * the corresponding flag value will be reset.
+ */
+void ParseCommandLineFlags(int* argc, char*** argv);
+
+/**
+ * @brief Allow undefined flags in ParseCommandLineFlags()
+ */
+void AllowUndefinedFlags();
+
+/**
+ * @brief Set flags from environment variables.
+ *
+ * It recieves a list of flags name, and will find the corresponding environment
+ * variables named "FLAGS_name", if found, it will set the environment variable
+ * values to the flags. If error_fatal is true, the program will exit when the
+ * environment variable is not set or the flag is not defined, that is the same
+ * effect as using commandline argument "--fromenv=var_name1,var_name2,...".
+ * Otherwise, the errors above will be ignored, that is the same effect as using
+ * commandline argument "--tryfromenv=var_name1,var_name2,...".
+ */
+void SetFlagsFromEnv(const std::vector<std::string>& flags, bool error_fatal);
+
+/**
+ * @brief Set Single flag value, return true if success.
+ */
+bool SetFlagValue(const std::string& name, const std::string& value);
+
+/**
+ * @brief Find flag by name, return true if found.
+ */
+bool FindFlag(const std::string& name);
+
+/**
+ * @brief Print all registered flags' help message. If to_file is true,
+ * write help message to file.
+ */
+void PrintAllFlagHelp(bool to_file = false,
+                      const std::string& file_name = "all_flags.txt");
+}  // namespace flags
+}  // namespace paddle
+
+// ----------------------------DECLARE FLAGS----------------------------
+#define PD_DECLARE_VARIABLE(type, name) \
+  namespace paddle {                    \
+  namespace flags {                     \
+  extern type FLAGS_##name;             \
+  }                                     \
+  }                                     \
+  using paddle::flags::FLAGS_##name
+
+#define PD_DECLARE_bool(name) PD_DECLARE_VARIABLE(bool, name)
+#define PD_DECLARE_int32(name) PD_DECLARE_VARIABLE(int32_t, name)
+#define PD_DECLARE_uint32(name) PD_DECLARE_VARIABLE(uint32_t, name)
+#define PD_DECLARE_int64(name) PD_DECLARE_VARIABLE(int64_t, name)
+#define PD_DECLARE_uint64(name) PD_DECLARE_VARIABLE(uint64_t, name)
+#define PD_DECLARE_double(name) PD_DECLARE_VARIABLE(double, name)
+#define PD_DECLARE_string(name) PD_DECLARE_VARIABLE(std::string, name)
+
+namespace paddle {
+namespace flags {
+class FlagRegisterer {
+ public:
+  template <typename T>
+  FlagRegisterer(std::string name,
+                 std::string description,
+                 std::string file,
+                 const T* default_value,
+                 T* value);
+};
+}  // namespace flags
+}  // namespace paddle
+
+// ----------------------------DEFINE FLAGS----------------------------
+#define PD_DEFINE_VARIABLE(type, name, default_value, description)           \
+  namespace paddle {                                                         \
+  namespace flags {                                                          \
+  static const type FLAGS_##name##_default = default_value;                  \
+  type FLAGS_##name = default_value;                                         \
+  /* Register FLAG */                                                        \
+  static ::paddle::flags::FlagRegisterer flag_##name##_registerer(           \
+      #name, description, __FILE__, &FLAGS_##name##_default, &FLAGS_##name); \
+  }                                                                          \
+  }                                                                          \
+  using paddle::flags::FLAGS_##name
+
+#define PD_DEFINE_bool(name, val, txt) PD_DEFINE_VARIABLE(bool, name, val, txt)
+#define PD_DEFINE_int32(name, val, txt) \
+  PD_DEFINE_VARIABLE(int32_t, name, val, txt)
+#define PD_DEFINE_uint32(name, val, txt) \
+  PD_DEFINE_VARIABLE(uint32_t, name, val, txt)
+#define PD_DEFINE_int64(name, val, txt) \
+  PD_DEFINE_VARIABLE(int64_t, name, val, txt)
+#define PD_DEFINE_uint64(name, val, txt) \
+  PD_DEFINE_VARIABLE(uint64_t, name, val, txt)
+#define PD_DEFINE_double(name, val, txt) \
+  PD_DEFINE_VARIABLE(double, name, val, txt)
+#define PD_DEFINE_string(name, val, txt) \
+  PD_DEFINE_VARIABLE(std::string, name, val, txt)
diff --git a/paddle/utils/flags_native_test.cc b/paddle/utils/flags_native_test.cc
new file mode 100644
index 00000000000..2b26a8cd1e9
--- /dev/null
+++ b/paddle/utils/flags_native_test.cc
@@ -0,0 +1,104 @@
+// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "paddle/utils/flags_native.h"
+
+#include <stdlib.h>
+#include "gtest/gtest.h"
+
+PD_DEFINE_int32(paddle_test_int32, 1, "test int32 flag");
+PD_DEFINE_uint32(paddle_test_uint32, 2, "test uint32 flag");
+PD_DEFINE_string(paddle_test_string, "raw", "test string flag");
+
+using namespace paddle::flags;  // NOLINT
+
+void SplitCommandlineArg(const std::string& commandline,
+                         int* argc,
+                         char*** argv) {
+  static std::vector<std::string> args;
+  args.clear();
+  for (size_t start_pos = 0, end_pos = 0;
+       start_pos < commandline.size() && end_pos != std::string::npos;
+       start_pos = end_pos + 1) {
+    end_pos = commandline.find(' ', start_pos);
+    args.push_back(commandline.substr(start_pos, end_pos - start_pos));
+  }
+  *argc = args.size();
+  *argv = new char*[*argc];
+  for (size_t i = 0; i < args.size(); i++) {
+    (*argv)[i] = const_cast<char*>(args[i].c_str());
+  }
+}
+
+TEST(flags_native_test, ParseCommandLineFlags) {
+  uint32_t test_int32 = 2;
+  ASSERT_EQ(FLAGS_paddle_test_int32, 1);
+  ASSERT_EQ(FLAGS_paddle_test_uint32, test_int32);
+  ASSERT_EQ(FLAGS_paddle_test_string, "raw");
+
+  // Construct commandline arguments input
+  std::string commandline =
+      "test --paddle_test_int32=3 --paddle_test_uint32=\"4\" "
+      "--paddle_test_string \"modified string\"";
+  int argc;
+  char** argv;
+  SplitCommandlineArg(commandline, &argc, &argv);
+
+  // Parse commandline flags and check
+  ParseCommandLineFlags(&argc, &argv);
+  delete argv;
+
+  test_int32 = 4;
+  ASSERT_EQ(FLAGS_paddle_test_int32, 3);
+  ASSERT_EQ(FLAGS_paddle_test_uint32, test_int32);
+  ASSERT_EQ(FLAGS_paddle_test_string, "modified string");
+
+  // test FindFlag and SetFlagValue
+  ASSERT_TRUE(FindFlag("paddle_test_int32"));
+
+  SetFlagValue("paddle_test_int32", "9");
+  ASSERT_EQ(FLAGS_paddle_test_int32, 9);
+}
+
+#if defined(_POSIX_C_SOURCE) && \
+    _POSIX_C_SOURCE >= 200112L  // environment for use setenv
+bool SetEnvVar(const std::string& var_name, const std::string& var_value) {
+  int res = setenv(var_name.c_str(), var_value.c_str(), 1);
+  return res == 0;
+}
+
+PD_DEFINE_bool(paddle_test_env_bool, false, "test env bool flag");
+PD_DEFINE_double(paddle_test_env_double, 3.14, "test env double flag");
+
+TEST(flags_native_test, SetFlagsFromEnv) {
+  ASSERT_EQ(FLAGS_paddle_test_env_bool, false);
+  ASSERT_EQ(FLAGS_paddle_test_env_double, 3.14);
+
+  ASSERT_TRUE(SetEnvVar("FLAGS_paddle_test_env_bool", "true"));
+  ASSERT_TRUE(SetEnvVar("FLAGS_paddle_test_env_double", "2.71"));
+
+  std::string commandline =
+      "test --fromenv=paddle_test_env_bool,paddle_test_env_double";
+  int argc;
+  char** argv;
+  SplitCommandlineArg(commandline, &argc, &argv);
+  ParseCommandLineFlags(&argc, &argv);
+  delete argv;
+
+  ASSERT_EQ(FLAGS_paddle_test_env_bool, true);
+  ASSERT_EQ(FLAGS_paddle_test_env_double, 2.71);
+}
+#endif
+
+TEST(flags_native_test, PrintAllFlagHelp) { PrintAllFlagHelp(); }
diff --git a/paddle/utils/string/pretty_log.cc b/paddle/utils/string/pretty_log.cc
index 44bbbd0cc1b..bb84ad20184 100644
--- a/paddle/utils/string/pretty_log.cc
+++ b/paddle/utils/string/pretty_log.cc
@@ -14,9 +14,9 @@
 
 #include "paddle/utils/string/pretty_log.h"
 
-#include "gflags/gflags.h"
+#include "paddle/utils/flags.h"
 
-DEFINE_bool(color, true, "Whether to turn on pretty log");  // NOLINT
+PD_DEFINE_bool(color, true, "Whether to turn on pretty log");  // NOLINT
 
 namespace paddle {
 namespace string {}  // namespace string
diff --git a/paddle/utils/string/pretty_log.h b/paddle/utils/string/pretty_log.h
index 9de7ce24abd..546bf1eec7d 100644
--- a/paddle/utils/string/pretty_log.h
+++ b/paddle/utils/string/pretty_log.h
@@ -18,10 +18,10 @@
 #include <string>
 #include <utility>
 
-#include "gflags/gflags.h"
+#include "paddle/utils/flags.h"
 #include "paddle/utils/string/printf.h"
 
-DECLARE_bool(color);
+PD_DECLARE_bool(color);
 
 namespace paddle {
 
diff --git a/test/cpp/fluid/benchmark/op_tester.cc b/test/cpp/fluid/benchmark/op_tester.cc
index 0ab90f03999..6f68ab23a45 100644
--- a/test/cpp/fluid/benchmark/op_tester.cc
+++ b/test/cpp/fluid/benchmark/op_tester.cc
@@ -16,7 +16,6 @@ limitations under the License. */
 
 #include <fstream>
 
-#include "gflags/gflags.h"
 #include "gtest/gtest.h"
 #include "paddle/fluid/framework/op_info.h"
 #include "paddle/fluid/framework/op_registry.h"
@@ -25,6 +24,7 @@ limitations under the License. */
 #include "paddle/fluid/platform/profiler.h"
 #include "paddle/fluid/platform/timer.h"
 #include "paddle/fluid/pybind/pybind.h"
+#include "paddle/utils/flags.h"
 
 // phi
 #include "paddle/phi/kernels/declarations.h"
@@ -33,8 +33,8 @@ namespace paddle {
 namespace operators {
 namespace benchmark {
 
-DEFINE_string(op_config_list, "", "Path of op config file.");  // NOLINT
-DEFINE_int32(specified_config_id, -1, "Test the specified op config.");
+PD_DEFINE_string(op_config_list, "", "Path of op config file.");  // NOLINT
+PD_DEFINE_int32(specified_config_id, -1, "Test the specified op config.");
 
 void OpTester::Init(const std::string &filename) {
   Init(OpTesterConfig(filename));
@@ -57,13 +57,13 @@ void OpTester::Init(const OpTesterConfig &config) {
   }
 
   if (config_.device_id >= 0) {
-    place_ = paddle::platform::CUDAPlace(config_.device_id);
+    place_ = ::paddle::platform::CUDAPlace(config_.device_id);
   } else {
-    place_ = paddle::platform::CPUPlace();
+    place_ = ::paddle::platform::CPUPlace();
   }
 
   framework::InitDevices();
-  scope_ = std::make_unique<paddle::framework::Scope>();
+  scope_ = std::make_unique<::paddle::framework::Scope>();
 
   op_ = framework::OpRegistry::CreateOp(op_desc_);
   CreateVariables(scope_.get());
@@ -318,7 +318,7 @@ void OpTester::SetupTensor(phi::DenseTensor *tensor,
   }
 
   if (!platform::is_cpu_place(place_)) {
-    paddle::framework::TensorCopySync(cpu_tensor, place_, tensor);
+    ::paddle::framework::TensorCopySync(cpu_tensor, place_, tensor);
   }
 }
 
diff --git a/test/cpp/fluid/mkldnn/test_mkldnn_cpu_quantize_pass.cc b/test/cpp/fluid/mkldnn/test_mkldnn_cpu_quantize_pass.cc
index 6f1ac7e56f3..d6ba2f7a0c2 100644
--- a/test/cpp/fluid/mkldnn/test_mkldnn_cpu_quantize_pass.cc
+++ b/test/cpp/fluid/mkldnn/test_mkldnn_cpu_quantize_pass.cc
@@ -29,7 +29,7 @@ using std::pair;
 using std::string;
 using std::unordered_map;
 
-DEFINE_bool(enable_mkldnn, true, "Enable MKLDNN");
+PD_DEFINE_bool(enable_mkldnn, true, "Enable MKLDNN");
 
 namespace paddle {
 namespace pass {
diff --git a/test/cpp/fluid/pscore/CMakeLists.txt b/test/cpp/fluid/pscore/CMakeLists.txt
index c19df6b4696..07d3efaa311 100644
--- a/test/cpp/fluid/pscore/CMakeLists.txt
+++ b/test/cpp/fluid/pscore/CMakeLists.txt
@@ -18,7 +18,7 @@ if(WITH_ARM_BRPC)
     framework_proto
     sendrecv_rpc
     arm_brpc
-    gflags
+    ${flags_dep}
     glog
     snappy
     device_context)
@@ -35,15 +35,9 @@ else()
     ps_framework_proto
     framework_proto
     sendrecv_rpc
-    brpc
-    leveldb
-    ssl
-    crypto
-    protobuf
-    gflags
-    glog
+    ${EXTERNAL_BRPC_DEPS}
+    ${flags_dep}
     zlib
-    snappy
     device_context)
 endif()
 
diff --git a/test/cpp/fluid/pscore/switch_server_test.cc b/test/cpp/fluid/pscore/switch_server_test.cc
index a5e6fff4804..5ea2e28d454 100644
--- a/test/cpp/fluid/pscore/switch_server_test.cc
+++ b/test/cpp/fluid/pscore/switch_server_test.cc
@@ -21,18 +21,18 @@ limitations under the License. */
 #include <string>
 #include <thread>  // NOLINT
 
-#include "gflags/gflags.h"
 #include "gtest/gtest.h"
 #include "paddle/fluid/distributed/ps/service/heter_client.h"
 #include "paddle/fluid/distributed/ps/service/heter_server.h"
+#include "paddle/utils/flags.h"
 
 namespace framework = paddle::framework;
 namespace platform = paddle::platform;
 namespace distributed = paddle::distributed;
 
-DEFINE_string(switch_addr_inner, "127.0.0.1:6000", "addr of inner cluster");
-DEFINE_string(switch_addr_heter, "127.0.0.1:6100", "add of inter cluster");
-DEFINE_string(peer_switch_addr, "127.0.0.1:7100", "add of inter cluster");
+PD_DEFINE_string(switch_addr_inner, "127.0.0.1:6000", "addr of inner cluster");
+PD_DEFINE_string(switch_addr_heter, "127.0.0.1:6100", "add of inter cluster");
+PD_DEFINE_string(peer_switch_addr, "127.0.0.1:7100", "add of inter cluster");
 
 void StartSwitchServer(
     std::shared_ptr<distributed::HeterServer>& switch_server_ptr,  // NOLINT
@@ -61,7 +61,7 @@ int main(int argc, char* argv[]) {
   framework::ProgramDesc program;
   exe.Prepare(program, 0);  // solve undefined symbol: tensor_table.cc
 
-  google::ParseCommandLineFlags(&argc, &argv, true);
+  paddle::flags::ParseCommandLineFlags(&argc, &argv);
 
   std::string switch_a_endpoint(FLAGS_switch_addr_inner);
   std::string switch_a_endpoint_inter(FLAGS_switch_addr_heter);
diff --git a/test/cpp/inference/api/analysis_predictor_tester.cc b/test/cpp/inference/api/analysis_predictor_tester.cc
index 2f794a4c784..35c07c3a837 100644
--- a/test/cpp/inference/api/analysis_predictor_tester.cc
+++ b/test/cpp/inference/api/analysis_predictor_tester.cc
@@ -31,7 +31,7 @@
 #include "paddle/phi/backends/cpu/cpu_info.h"
 #include "test/cpp/inference/api/tester_helper.h"
 
-DEFINE_string(dirname, "", "dirname to tests.");
+PD_DEFINE_string(dirname, "", "dirname to tests.");
 
 namespace paddle {
 
diff --git a/test/cpp/inference/api/analyzer_bfloat16_image_classification_tester.cc b/test/cpp/inference/api/analyzer_bfloat16_image_classification_tester.cc
index f6d4d8e8f76..fe7d2a3a6f6 100644
--- a/test/cpp/inference/api/analyzer_bfloat16_image_classification_tester.cc
+++ b/test/cpp/inference/api/analyzer_bfloat16_image_classification_tester.cc
@@ -16,7 +16,7 @@ limitations under the License. */
 #include "paddle/phi/backends/cpu/cpu_info.h"
 #include "test/cpp/inference/api/tester_helper.h"
 
-DEFINE_bool(enable_mkldnn, true, "Enable MKLDNN");
+PD_DEFINE_bool(enable_mkldnn, true, "Enable MKLDNN");
 
 namespace paddle {
 namespace inference {
diff --git a/test/cpp/inference/api/analyzer_dam_tester.cc b/test/cpp/inference/api/analyzer_dam_tester.cc
index 21276c71f63..d17f8670adc 100644
--- a/test/cpp/inference/api/analyzer_dam_tester.cc
+++ b/test/cpp/inference/api/analyzer_dam_tester.cc
@@ -130,7 +130,7 @@ void PrepareInputs(std::vector<PaddleTensor> *input_slots,
   auto one_batch = data->NextBatch();
   PADDLE_ENFORCE(
       !one_batch.response.empty(),
-      paddle::platform::errors::Fatal("The response of one batch is empty."));
+      ::paddle::platform::errors::Fatal("The response of one batch is empty."));
   int size = one_batch.response[0].size();
   CHECK_EQ(size, kMaxTurnLen);
   // turn tensor assignment
@@ -228,17 +228,17 @@ void profile(bool use_mkldnn = false) {
   if (FLAGS_num_threads == 1 && !FLAGS_test_all_data) {
     PADDLE_ENFORCE_GT(outputs.size(),
                       0,
-                      paddle::platform::errors::Fatal(
+                      ::paddle::platform::errors::Fatal(
                           "The size of outputs should be greater than 0."));
     auto output = outputs.back();
     PADDLE_ENFORCE_GT(output.size(),
                       0,
-                      paddle::platform::errors::Fatal(
+                      ::paddle::platform::errors::Fatal(
                           "The size of output should be greater than 0."));
     size_t size = GetSize(output[0]);
     PADDLE_ENFORCE_GT(size,
                       0,
-                      paddle::platform::errors::Fatal(
+                      ::paddle::platform::errors::Fatal(
                           "The size of output should be greater than 0."));
     float *result = static_cast<float *>(output[0].data.data());
     for (size_t i = 0; i < size; i++) {
diff --git a/test/cpp/inference/api/analyzer_detect_functional_mkldnn_tester.cc b/test/cpp/inference/api/analyzer_detect_functional_mkldnn_tester.cc
index 389e0c9648d..8d5627fce3c 100644
--- a/test/cpp/inference/api/analyzer_detect_functional_mkldnn_tester.cc
+++ b/test/cpp/inference/api/analyzer_detect_functional_mkldnn_tester.cc
@@ -20,8 +20,8 @@ limitations under the License. */
 #include "paddle/phi/common/place.h"
 #include "test/cpp/inference/api/tester_helper.h"
 
-DEFINE_string(infer_shape, "", "data shape file");
-DEFINE_int32(sample, 20, "number of sample");
+PD_DEFINE_string(infer_shape, "", "data shape file");
+PD_DEFINE_int32(sample, 20, "number of sample");
 
 namespace paddle {
 namespace inference {
diff --git a/test/cpp/inference/api/analyzer_detect_tester.cc b/test/cpp/inference/api/analyzer_detect_tester.cc
index 62b97d635b5..72e498521cd 100644
--- a/test/cpp/inference/api/analyzer_detect_tester.cc
+++ b/test/cpp/inference/api/analyzer_detect_tester.cc
@@ -19,8 +19,8 @@ limitations under the License. */
 
 #include "test/cpp/inference/api/tester_helper.h"
 
-DEFINE_string(infer_shape, "", "data shape file");
-DEFINE_int32(sample, 20, "number of sample");
+PD_DEFINE_string(infer_shape, "", "data shape file");
+PD_DEFINE_int32(sample, 20, "number of sample");
 
 namespace paddle {
 namespace inference {
diff --git a/test/cpp/inference/api/analyzer_image_classification_tester.cc b/test/cpp/inference/api/analyzer_image_classification_tester.cc
index 93b8f92c797..1df6d448861 100644
--- a/test/cpp/inference/api/analyzer_image_classification_tester.cc
+++ b/test/cpp/inference/api/analyzer_image_classification_tester.cc
@@ -17,7 +17,7 @@ limitations under the License. */
 
 #include "test/cpp/inference/api/tester_helper.h"
 
-DEFINE_bool(disable_mkldnn_fc, false, "Disable usage of MKL-DNN's FC op");
+PD_DEFINE_bool(disable_mkldnn_fc, false, "Disable usage of MKL-DNN's FC op");
 
 namespace paddle {
 namespace inference {
diff --git a/test/cpp/inference/api/analyzer_int8_image_classification_tester.cc b/test/cpp/inference/api/analyzer_int8_image_classification_tester.cc
index 57552ccb82e..77c12dcfe0f 100644
--- a/test/cpp/inference/api/analyzer_int8_image_classification_tester.cc
+++ b/test/cpp/inference/api/analyzer_int8_image_classification_tester.cc
@@ -18,7 +18,7 @@ limitations under the License. */
 #include "paddle/fluid/inference/api/paddle_analysis_config.h"
 #include "test/cpp/inference/api/tester_helper.h"
 
-DEFINE_bool(enable_mkldnn, true, "Enable MKLDNN");
+PD_DEFINE_bool(enable_mkldnn, true, "Enable MKLDNN");
 
 namespace paddle {
 namespace inference {
@@ -53,7 +53,7 @@ TEST(Analyzer_int8_image_classification, quantization) {
     // prepare warmup batch from input data read earlier
     // warmup batch size can be different than batch size
     std::shared_ptr<std::vector<PaddleTensor>> warmup_data =
-        paddle::inference::GetWarmupData(input_slots_all);
+        ::paddle::inference::GetWarmupData(input_slots_all);
 
     // INT8 implies FC oneDNN passes to be used
     q_cfg.pass_builder()->AppendPass("fc_mkldnn_pass");
diff --git a/test/cpp/inference/api/analyzer_int8_object_detection_tester.cc b/test/cpp/inference/api/analyzer_int8_object_detection_tester.cc
index a3b13dd691c..311fb0946ca 100644
--- a/test/cpp/inference/api/analyzer_int8_object_detection_tester.cc
+++ b/test/cpp/inference/api/analyzer_int8_object_detection_tester.cc
@@ -18,7 +18,7 @@ limitations under the License. */
 #include "paddle/fluid/inference/api/paddle_analysis_config.h"
 #include "test/cpp/inference/api/tester_helper.h"
 
-DEFINE_bool(enable_mkldnn, true, "Enable MKLDNN");
+PD_DEFINE_bool(enable_mkldnn, true, "Enable MKLDNN");
 
 // setting iterations to 0 means processing the whole dataset
 namespace paddle {
@@ -153,7 +153,7 @@ std::shared_ptr<std::vector<PaddleTensor>> GetWarmupData(
   PADDLE_ENFORCE_LE(
       static_cast<size_t>(num_images),
       iterations * test_data_batch_size,
-      paddle::platform::errors::Fatal(
+      ::paddle::platform::errors::Fatal(
           "The requested quantization warmup data size " +
           std::to_string(num_images) + " is bigger than all test data size."));
 
@@ -247,9 +247,9 @@ std::shared_ptr<std::vector<PaddleTensor>> GetWarmupData(
   PADDLE_ENFORCE_EQ(
       static_cast<size_t>(num_objects),
       static_cast<size_t>(objects_accum),
-      paddle::platform::errors::Fatal("The requested num of objects " +
-                                      std::to_string(num_objects) +
-                                      " is the same as objects_accum."));
+      ::paddle::platform::errors::Fatal("The requested num of objects " +
+                                        std::to_string(num_objects) +
+                                        " is the same as objects_accum."));
 
   auto warmup_data = std::make_shared<std::vector<PaddleTensor>>(4);
   (*warmup_data)[0] = std::move(images);
diff --git a/test/cpp/inference/api/analyzer_lac_tester.cc b/test/cpp/inference/api/analyzer_lac_tester.cc
index fb82bbc3b2d..9bdb819e5fb 100644
--- a/test/cpp/inference/api/analyzer_lac_tester.cc
+++ b/test/cpp/inference/api/analyzer_lac_tester.cc
@@ -99,10 +99,10 @@ void GetOneBatch(std::vector<PaddleTensor> *input_slots,
   input_tensor.name = "word";
   input_tensor.dtype = PaddleDType::INT64;
   TensorAssignData<int64_t>(&input_tensor, {one_batch.data}, one_batch.lod);
-  PADDLE_ENFORCE_EQ(
-      batch_size,
-      static_cast<int>(one_batch.lod.size() - 1),
-      paddle::platform::errors::Fatal("The lod size of one batch is invaild."));
+  PADDLE_ENFORCE_EQ(batch_size,
+                    static_cast<int>(one_batch.lod.size() - 1),
+                    ::paddle::platform::errors::Fatal(
+                        "The lod size of one batch is invaild."));
   input_slots->assign({input_tensor});
 }
 
@@ -145,19 +145,19 @@ TEST(Analyzer_LAC, profile) {
         15, 44, 38, 39, 14, 15, 44, 22, 23, 23, 23, 23, 23, 23, 23};
     PADDLE_ENFORCE_GT(outputs.size(),
                       0,
-                      paddle::platform::errors::Fatal(
+                      ::paddle::platform::errors::Fatal(
                           "The size of output should be greater than 0."));
     auto output = outputs.back();
     PADDLE_ENFORCE_EQ(output.size(),
                       1UL,
-                      paddle::platform::errors::Fatal(
+                      ::paddle::platform::errors::Fatal(
                           "The size of output should be equal to 1."));
     size_t size = GetSize(output[0]);
     size_t batch1_size = sizeof(lac_ref_data) / sizeof(int64_t);
     PADDLE_ENFORCE_GE(
         size,
         batch1_size,
-        paddle::platform::errors::Fatal("The size of batch is invaild."));
+        ::paddle::platform::errors::Fatal("The size of batch is invaild."));
     int64_t *pdata = static_cast<int64_t *>(output[0].data.data());
     for (size_t i = 0; i < batch1_size; ++i) {
       EXPECT_EQ(pdata[i], lac_ref_data[i]);
diff --git a/test/cpp/inference/api/analyzer_mmp_tester.cc b/test/cpp/inference/api/analyzer_mmp_tester.cc
index a432d5c10b2..92345fc8950 100644
--- a/test/cpp/inference/api/analyzer_mmp_tester.cc
+++ b/test/cpp/inference/api/analyzer_mmp_tester.cc
@@ -18,8 +18,8 @@
 #include "test/cpp/inference/api/tester_helper.h"
 
 // Here add missing commands
-DEFINE_string(infer_model2, "", "model path");
-DEFINE_string(infer_model3, "", "model path");
+PD_DEFINE_string(infer_model2, "", "model path");
+PD_DEFINE_string(infer_model3, "", "model path");
 
 namespace paddle {
 namespace inference {
@@ -100,12 +100,12 @@ void compare(bool use_mkldnn = false) {
       xx2_output.begin(),
       [](const float& l, const float& r) { return fabs(l - r) < 1e-4; });
 
-  PADDLE_ENFORCE_EQ(
-      result,
-      true,
-      paddle::platform::errors::Fatal("Results of model run independently "
-                                      "differs from results of the same model "
-                                      "run as a sequence of models"));
+  PADDLE_ENFORCE_EQ(result,
+                    true,
+                    ::paddle::platform::errors::Fatal(
+                        "Results of model run independently "
+                        "differs from results of the same model "
+                        "run as a sequence of models"));
 }
 
 TEST(Analyzer_mmp, compare) { compare(); }
diff --git a/test/cpp/inference/api/analyzer_quant_image_classification_tester.cc b/test/cpp/inference/api/analyzer_quant_image_classification_tester.cc
index 69b627275cd..e9b841ec772 100644
--- a/test/cpp/inference/api/analyzer_quant_image_classification_tester.cc
+++ b/test/cpp/inference/api/analyzer_quant_image_classification_tester.cc
@@ -18,7 +18,7 @@ limitations under the License. */
 #include "paddle/fluid/inference/api/paddle_analysis_config.h"
 #include "test/cpp/inference/api/tester_helper.h"
 
-DEFINE_bool(enable_mkldnn, true, "Enable MKLDNN");
+PD_DEFINE_bool(enable_mkldnn, true, "Enable MKLDNN");
 
 namespace paddle {
 namespace inference {
diff --git a/test/cpp/inference/api/analyzer_rnn1_tester.cc b/test/cpp/inference/api/analyzer_rnn1_tester.cc
index 7f0f11f5515..c5c7df887dd 100644
--- a/test/cpp/inference/api/analyzer_rnn1_tester.cc
+++ b/test/cpp/inference/api/analyzer_rnn1_tester.cc
@@ -14,7 +14,7 @@
 
 #include "test/cpp/inference/api/tester_helper.h"
 
-DEFINE_bool(with_precision_check, true, "turn on test");
+PD_DEFINE_bool(with_precision_check, true, "turn on test");
 
 namespace paddle {
 namespace inference {
diff --git a/test/cpp/inference/api/analyzer_seq_pool1_tester_helper.h b/test/cpp/inference/api/analyzer_seq_pool1_tester_helper.h
index 3c0ad4b5f82..0d75eacbbdf 100644
--- a/test/cpp/inference/api/analyzer_seq_pool1_tester_helper.h
+++ b/test/cpp/inference/api/analyzer_seq_pool1_tester_helper.h
@@ -65,7 +65,7 @@ struct DataRecord {
       PADDLE_ENFORCE_EQ(
           slot_data.size() % 11,
           0UL,
-          paddle::platform::errors::Fatal(
+          ::paddle::platform::errors::Fatal(
               "line %d, %s should be divisible", num_lines, name));
       datasets[name].emplace_back(std::move(slot_data));
     }
@@ -73,19 +73,19 @@ struct DataRecord {
     PADDLE_ENFORCE_EQ(
         num_samples * num_slots,
         static_cast<size_t>(num_lines),
-        paddle::platform::errors::Fatal("num samples should be divisible"));
+        ::paddle::platform::errors::Fatal("num samples should be divisible"));
     PADDLE_ENFORCE_GT(num_samples,
                       0UL,
-                      paddle::platform::errors::Fatal(
+                      ::paddle::platform::errors::Fatal(
                           "The num of samples should be greater than 0."));
   }
 
   void Prepare(int bs) {
     for (auto it = datasets.begin(); it != datasets.end(); ++it) {
-      PADDLE_ENFORCE_EQ(
-          it->second.size(),
-          num_samples,
-          paddle::platform::errors::Fatal("size of each slot should be equal"));
+      PADDLE_ENFORCE_EQ(it->second.size(),
+                        num_samples,
+                        ::paddle::platform::errors::Fatal(
+                            "size of each slot should be equal"));
     }
     size_t num_batches = num_samples / bs;
     EXPECT_GT(num_batches, 0UL);
@@ -110,7 +110,7 @@ struct DataRecord {
           PADDLE_ENFORCE_EQ(
               len * 11,
               datas[id].size(),
-              paddle::platform::errors::Fatal(
+              ::paddle::platform::errors::Fatal(
                   "%s %d size should be divisible", slot.name, id));
           lod[k + 1] = lod[k] + len;
         }
diff --git a/test/cpp/inference/api/analyzer_vis_tester.cc b/test/cpp/inference/api/analyzer_vis_tester.cc
index cf79d26847f..d8b15393ad6 100644
--- a/test/cpp/inference/api/analyzer_vis_tester.cc
+++ b/test/cpp/inference/api/analyzer_vis_tester.cc
@@ -64,7 +64,7 @@ void SetInput(std::vector<std::vector<PaddleTensor>> *inputs) {
   PADDLE_ENFORCE_EQ(
       FLAGS_test_all_data,
       0,
-      paddle::platform::errors::Fatal("Only have single batch of data."));
+      ::paddle::platform::errors::Fatal("Only have single batch of data."));
   std::string line;
   std::ifstream file(FLAGS_infer_data);
   std::getline(file, line);
@@ -107,7 +107,7 @@ void profile(bool use_mkldnn = false) {
 
     PADDLE_ENFORCE_GT(outputs.size(),
                       0,
-                      paddle::platform::errors::Fatal(
+                      ::paddle::platform::errors::Fatal(
                           "The size of output should be greater than 0."));
     auto &output = outputs.back().front();
     size_t numel = output.data.length() / PaddleDtypeSize(output.dtype);
diff --git a/test/cpp/inference/api/api_impl_tester.cc b/test/cpp/inference/api/api_impl_tester.cc
index 934ff065350..78e908189cc 100644
--- a/test/cpp/inference/api/api_impl_tester.cc
+++ b/test/cpp/inference/api/api_impl_tester.cc
@@ -17,9 +17,9 @@ limitations under the License. */
 
 #include <thread>  // NOLINT
 
-#include "gflags/gflags.h"
 #include "paddle/fluid/framework/convert_utils.h"
 #include "paddle/fluid/inference/api/api_impl.h"
+#include "paddle/utils/flags.h"
 #include "test/cpp/inference/test_helper.h"
 
 #ifdef __clang__
@@ -28,10 +28,10 @@ limitations under the License. */
 #define ACC_DIFF 1e-3
 #endif
 
-DEFINE_string(word2vec_dirname,
-              "",
-              "Directory of the word2vec inference model.");
-DEFINE_string(book_dirname, "", "Directory of the book inference model.");
+PD_DEFINE_string(word2vec_dirname,
+                 "",
+                 "Directory of the word2vec inference model.");
+PD_DEFINE_string(book_dirname, "", "Directory of the book inference model.");
 
 namespace paddle {
 
@@ -67,11 +67,11 @@ NativeConfig GetConfig() {
   return config;
 }
 
-void MainWord2Vec(const paddle::PaddlePlace& place) {
+void MainWord2Vec(const ::paddle::PaddlePlace& place) {
   NativeConfig config = GetConfig();
   auto predictor = CreatePaddlePredictor<NativeConfig>(config);
-  config.use_gpu = paddle::gpu_place_used(place);
-  config.use_xpu = paddle::xpu_place_used(place);
+  config.use_gpu = ::paddle::gpu_place_used(place);
+  config.use_xpu = ::paddle::xpu_place_used(place);
 
   phi::DenseTensor first_word, second_word, third_word, fourth_word;
   framework::LoD lod{{0, 1}};
@@ -105,7 +105,7 @@ void MainWord2Vec(const paddle::PaddlePlace& place) {
   cpu_feeds.push_back(&fourth_word);
 
   framework::FetchType output1;
-  std::vector<paddle::framework::FetchType*> cpu_fetchs1;
+  std::vector<::paddle::framework::FetchType*> cpu_fetchs1;
   cpu_fetchs1.push_back(&output1);
 
   TestInference<platform::CPUPlace>(config.model_dir, cpu_feeds, cpu_fetchs1);
@@ -118,12 +118,12 @@ void MainWord2Vec(const paddle::PaddlePlace& place) {
   }
 }
 
-void MainImageClassification(const paddle::PaddlePlace& place) {
+void MainImageClassification(const ::paddle::PaddlePlace& place) {
   int batch_size = 2;
   bool repeat = false;
   NativeConfig config = GetConfig();
-  config.use_gpu = paddle::gpu_place_used(place);
-  config.use_xpu = paddle::xpu_place_used(place);
+  config.use_gpu = ::paddle::gpu_place_used(place);
+  config.use_xpu = ::paddle::xpu_place_used(place);
   config.model_dir =
       FLAGS_book_dirname + "/image_classification_resnet.inference.model";
 
@@ -163,10 +163,10 @@ void MainImageClassification(const paddle::PaddlePlace& place) {
   }
 }
 
-void MainThreadsWord2Vec(const paddle::PaddlePlace& place) {
+void MainThreadsWord2Vec(const ::paddle::PaddlePlace& place) {
   NativeConfig config = GetConfig();
-  config.use_gpu = paddle::gpu_place_used(place);
-  config.use_xpu = paddle::xpu_place_used(place);
+  config.use_gpu = ::paddle::gpu_place_used(place);
+  config.use_xpu = ::paddle::xpu_place_used(place);
   auto main_predictor = CreatePaddlePredictor<NativeConfig>(config);
 
   // prepare inputs data and reference results
@@ -186,7 +186,7 @@ void MainThreadsWord2Vec(const paddle::PaddlePlace& place) {
 
     // get reference result of each job
     std::vector<phi::DenseTensor*> ref_feeds;
-    std::vector<paddle::framework::FetchType*> ref_fetches(1, &refs[i]);
+    std::vector<::paddle::framework::FetchType*> ref_fetches(1, &refs[i]);
     for (auto& word : jobs[i]) {
       ref_feeds.push_back(&word);
     }
@@ -225,12 +225,12 @@ void MainThreadsWord2Vec(const paddle::PaddlePlace& place) {
   }
 }
 
-void MainThreadsImageClassification(const paddle::PaddlePlace& place) {
+void MainThreadsImageClassification(const ::paddle::PaddlePlace& place) {
   constexpr int num_jobs = 4;  // each job run 1 batch
   constexpr int batch_size = 1;
   NativeConfig config = GetConfig();
-  config.use_gpu = paddle::gpu_place_used(place);
-  config.use_xpu = paddle::xpu_place_used(place);
+  config.use_gpu = ::paddle::gpu_place_used(place);
+  config.use_xpu = ::paddle::xpu_place_used(place);
   config.model_dir =
       FLAGS_book_dirname + "/image_classification_resnet.inference.model";
 
@@ -280,53 +280,53 @@ void MainThreadsImageClassification(const paddle::PaddlePlace& place) {
 }
 
 TEST(inference_api_native, word2vec_cpu) {
-  MainWord2Vec(paddle::PaddlePlace::kCPU);
+  MainWord2Vec(::paddle::PaddlePlace::kCPU);
 }
 TEST(inference_api_native, word2vec_cpu_threads) {
-  MainThreadsWord2Vec(paddle::PaddlePlace::kCPU);
+  MainThreadsWord2Vec(::paddle::PaddlePlace::kCPU);
 }
 TEST(inference_api_native, image_classification_cpu) {
-  MainImageClassification(paddle::PaddlePlace::kCPU);
+  MainImageClassification(::paddle::PaddlePlace::kCPU);
 }
 TEST(inference_api_native, image_classification_cpu_threads) {
-  MainThreadsImageClassification(paddle::PaddlePlace::kCPU);
+  MainThreadsImageClassification(::paddle::PaddlePlace::kCPU);
 }
 
 #ifdef PADDLE_WITH_XPU
 TEST(inference_api_native, word2vec_xpu) {
-  MainWord2Vec(paddle::PaddlePlace::kXPU);
+  MainWord2Vec(::paddle::PaddlePlace::kXPU);
 }
 TEST(inference_api_native, image_classification_xpu) {
-  MainImageClassification(paddle::PaddlePlace::kXPU);
+  MainImageClassification(::paddle::PaddlePlace::kXPU);
 }
 #endif
 
 #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
 TEST(inference_api_native, word2vec_gpu) {
-  MainWord2Vec(paddle::PaddlePlace::kGPU);
+  MainWord2Vec(::paddle::PaddlePlace::kGPU);
 }
 // Turn off temporarily for the unstable result.
 // TEST(inference_api_native, word2vec_gpu_threads) {
-//   MainThreadsWord2Vec(paddle::PaddlePlace::kGPU);
+//   MainThreadsWord2Vec(::paddle::PaddlePlace::kGPU);
 // }
 TEST(inference_api_native, image_classification_gpu) {
-  MainImageClassification(paddle::PaddlePlace::kGPU);
+  MainImageClassification(::paddle::PaddlePlace::kGPU);
 }
 // Turn off temporarily for the unstable result.
 // TEST(inference_api_native, image_classification_gpu_threads) {
-//   MainThreadsImageClassification(paddle::PaddlePlace::kGPU);
+//   MainThreadsImageClassification(::paddle::PaddlePlace::kGPU);
 // }
 #endif
 
 #ifdef PADDLE_WITH_DNNL
 TEST(inference_api_native, image_classification_cpu_onednn) {
   FLAGS_use_mkldnn = true;
-  MainImageClassification(paddle::PaddlePlace::kCPU);
+  MainImageClassification(::paddle::PaddlePlace::kCPU);
 }
 
 TEST(inference_api_native, word2vec_cpu_onednn) {
   FLAGS_use_mkldnn = true;
-  MainWord2Vec(paddle::PaddlePlace::kCPU);
+  MainWord2Vec(::paddle::PaddlePlace::kCPU);
 }
 #endif
 
diff --git a/test/cpp/inference/api/ipu_multi_model_profile.cc b/test/cpp/inference/api/ipu_multi_model_profile.cc
index 3c5b1af1594..d5b8ce25322 100644
--- a/test/cpp/inference/api/ipu_multi_model_profile.cc
+++ b/test/cpp/inference/api/ipu_multi_model_profile.cc
@@ -15,7 +15,7 @@ limitations under the License. */
 #include <glog/logging.h>
 #include <gtest/gtest.h>
 
-#include "gflags/gflags.h"
+#include "paddle/utils/flags.h"
 #include "test/cpp/inference/api/tester_helper.h"
 
 namespace paddle {
diff --git a/test/cpp/inference/api/ipu_resnet50_fp16_test.cc b/test/cpp/inference/api/ipu_resnet50_fp16_test.cc
index 99f0d58926d..1e3ddb51cb8 100644
--- a/test/cpp/inference/api/ipu_resnet50_fp16_test.cc
+++ b/test/cpp/inference/api/ipu_resnet50_fp16_test.cc
@@ -17,7 +17,7 @@ limitations under the License. */
 
 #include <cmath>
 
-#include "gflags/gflags.h"
+#include "paddle/utils/flags.h"
 #include "test/cpp/inference/api/tester_helper.h"
 
 namespace paddle {
diff --git a/test/cpp/inference/api/ipu_resnet50_test.cc b/test/cpp/inference/api/ipu_resnet50_test.cc
index 5a414bf9415..d45cd9bf49a 100644
--- a/test/cpp/inference/api/ipu_resnet50_test.cc
+++ b/test/cpp/inference/api/ipu_resnet50_test.cc
@@ -14,7 +14,7 @@ limitations under the License. */
 
 #include <cmath>
 
-#include "gflags/gflags.h"
+#include "paddle/utils/flags.h"
 #include "test/cpp/inference/api/tester_helper.h"
 
 namespace paddle {
diff --git a/test/cpp/inference/api/ipu_word2vec_sample.cc b/test/cpp/inference/api/ipu_word2vec_sample.cc
index ba8f28ee5e1..e43d03c5108 100644
--- a/test/cpp/inference/api/ipu_word2vec_sample.cc
+++ b/test/cpp/inference/api/ipu_word2vec_sample.cc
@@ -24,11 +24,11 @@ limitations under the License. */
 #include <string>
 #include <vector>
 
-#include "gflags/gflags.h"
 #include "glog/logging.h"
 #include "paddle/fluid/inference/api/paddle_inference_api.h"
+#include "paddle/utils/flags.h"
 
-DEFINE_string(infer_model, "", "Directory of the inference model.");
+PD_DEFINE_string(infer_model, "", "Directory of the inference model.");
 
 using paddle_infer::Config;
 using paddle_infer::CreatePredictor;
@@ -70,7 +70,7 @@ void inference(std::string model_path,
 }
 
 int main(int argc, char *argv[]) {
-  ::GFLAGS_NAMESPACE::ParseCommandLineFlags(&argc, &argv, true);
+  ::paddle::flags::ParseCommandLineFlags(&argc, &argv);
   std::vector<float> ipu_result;
   std::vector<float> cpu_result;
   inference(FLAGS_infer_model, true, &ipu_result);
diff --git a/test/cpp/inference/api/lite_mul_model_test.cc b/test/cpp/inference/api/lite_mul_model_test.cc
index e600a3bab91..3fa8e545a57 100644
--- a/test/cpp/inference/api/lite_mul_model_test.cc
+++ b/test/cpp/inference/api/lite_mul_model_test.cc
@@ -19,7 +19,7 @@ limitations under the License. */
 #include <mutex>   // NOLINT
 #include <thread>  // NOLINT
 
-#include "gflags/gflags.h"
+#include "paddle/utils/flags.h"
 #include "test/cpp/inference/api/tester_helper.h"
 
 namespace paddle {
diff --git a/test/cpp/inference/api/lite_resnet50_test.cc b/test/cpp/inference/api/lite_resnet50_test.cc
index e35e2838823..dce9a8932fe 100644
--- a/test/cpp/inference/api/lite_resnet50_test.cc
+++ b/test/cpp/inference/api/lite_resnet50_test.cc
@@ -17,7 +17,7 @@ limitations under the License. */
 
 #include <cmath>
 
-#include "gflags/gflags.h"
+#include "paddle/utils/flags.h"
 #include "test/cpp/inference/api/tester_helper.h"
 
 namespace paddle {
diff --git a/test/cpp/inference/api/mkldnn_quantizer_tester.cc b/test/cpp/inference/api/mkldnn_quantizer_tester.cc
index 5e699a8b4c6..8edad9fe271 100644
--- a/test/cpp/inference/api/mkldnn_quantizer_tester.cc
+++ b/test/cpp/inference/api/mkldnn_quantizer_tester.cc
@@ -19,7 +19,7 @@
 #include "paddle/fluid/inference/api/mkldnn_quantizer.h"
 #include "paddle/fluid/inference/api/paddle_inference_api.h"
 
-DEFINE_string(dirname, "", "dirname to tests.");
+PD_DEFINE_string(dirname, "", "dirname to tests.");
 
 namespace paddle {
 
diff --git a/test/cpp/inference/api/paddle_infer_api_copy_tensor_tester.cc b/test/cpp/inference/api/paddle_infer_api_copy_tensor_tester.cc
index 4674b77091a..56b1b5b5d7f 100644
--- a/test/cpp/inference/api/paddle_infer_api_copy_tensor_tester.cc
+++ b/test/cpp/inference/api/paddle_infer_api_copy_tensor_tester.cc
@@ -19,10 +19,10 @@ limitations under the License. */
 #include <cstring>
 #include <numeric>
 
-#include "gflags/gflags.h"
 #include "glog/logging.h"
 #include "paddle/fluid/inference/api/paddle_infer_contrib.h"
 #include "paddle/fluid/platform/float16.h"
+#include "paddle/utils/flags.h"
 #include "test/cpp/inference/api/trt_test_helper.h"
 
 namespace paddle_infer {
diff --git a/test/cpp/inference/api/paddle_infer_api_errors_tester.cc b/test/cpp/inference/api/paddle_infer_api_errors_tester.cc
index c716115ce2a..4fc1f6f0843 100644
--- a/test/cpp/inference/api/paddle_infer_api_errors_tester.cc
+++ b/test/cpp/inference/api/paddle_infer_api_errors_tester.cc
@@ -12,11 +12,11 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-#include "gflags/gflags.h"
 #include "glog/logging.h"
 #include "gtest/gtest.h"
 #include "paddle/fluid/inference/api/paddle_infer_contrib.h"
 #include "paddle/fluid/platform/enforce.h"
+#include "paddle/utils/flags.h"
 
 namespace paddle_infer {
 namespace contrib {
diff --git a/test/cpp/inference/api/paddle_infer_api_test.cc b/test/cpp/inference/api/paddle_infer_api_test.cc
index c59ac40e5e5..e53473a520d 100644
--- a/test/cpp/inference/api/paddle_infer_api_test.cc
+++ b/test/cpp/inference/api/paddle_infer_api_test.cc
@@ -15,7 +15,7 @@ limitations under the License. */
 #include <glog/logging.h>
 #include <gtest/gtest.h>
 
-#include "gflags/gflags.h"
+#include "paddle/utils/flags.h"
 #include "test/cpp/inference/api/tester_helper.h"
 
 namespace paddle_infer {
diff --git a/test/cpp/inference/api/tester_helper.h b/test/cpp/inference/api/tester_helper.h
index fe016abdaee..a204c31cfd3 100644
--- a/test/cpp/inference/api/tester_helper.h
+++ b/test/cpp/inference/api/tester_helper.h
@@ -40,83 +40,87 @@
 #include "test/cpp/inference/api/config_printer.h"
 #include "test/cpp/inference/test_helper.h"
 
-DEFINE_string(model_name, "", "model name");
-DEFINE_string(infer_model, "", "model path");
-DEFINE_string(fp32_model, "", "FP32 model path");
-DEFINE_string(int8_model, "", "INT8 model path");
-DEFINE_string(infer_data, "", "data file");
-DEFINE_string(refer_result, "", "reference result for comparison");
-DEFINE_int32(batch_size, 1, "batch size");
-DEFINE_bool(ernie_large, false, "Test ernie large");
-DEFINE_bool(with_accuracy_layer,
-            true,
-            "Calculate the accuracy while label is in the input");
-DEFINE_bool(enable_fp32, true, "Enable FP32 type prediction");
-DEFINE_bool(enable_bf16, false, "Enable BF16 type prediction");
-DEFINE_bool(enable_int8_ptq,
-            false,
-            "Enable INT8 post-training quantization prediction");
-DEFINE_bool(enable_int8_qat,
-            false,
-            "Enable INT8 quant-aware training prediction");
-DEFINE_int32(warmup_batch_size, 100, "batch size for quantization warmup");
+PD_DEFINE_string(model_name, "", "model name");
+PD_DEFINE_string(infer_model, "", "model path");
+PD_DEFINE_string(fp32_model, "", "FP32 model path");
+PD_DEFINE_string(int8_model, "", "INT8 model path");
+PD_DEFINE_string(infer_data, "", "data file");
+PD_DEFINE_string(refer_result, "", "reference result for comparison");
+PD_DEFINE_int32(batch_size, 1, "batch size");
+PD_DEFINE_bool(ernie_large, false, "Test ernie large");
+PD_DEFINE_bool(with_accuracy_layer,
+               true,
+               "Calculate the accuracy while label is in the input");
+PD_DEFINE_bool(enable_fp32, true, "Enable FP32 type prediction");
+PD_DEFINE_bool(enable_bf16, false, "Enable BF16 type prediction");
+PD_DEFINE_bool(enable_int8_ptq,
+               false,
+               "Enable INT8 post-training quantization prediction");
+PD_DEFINE_bool(enable_int8_qat,
+               false,
+               "Enable INT8 quant-aware training prediction");
+PD_DEFINE_int32(warmup_batch_size, 100, "batch size for quantization warmup");
 // setting iterations to 0 means processing the whole dataset
-DEFINE_int32(iterations, 0, "number of batches to process");
-DEFINE_int32(repeat, 1, "Running the inference program repeat times.");
-DEFINE_bool(test_all_data, false, "Test the all dataset in data file.");
-DEFINE_int32(num_threads, 1, "Running the inference program in multi-threads.");
-DEFINE_bool(use_analysis,
-            true,
-            "Running the inference program in analysis mode.");
-DEFINE_bool(record_benchmark,
-            false,
-            "Record benchmark after profiling the model");
-DEFINE_double(accuracy, 1e-3, "Result Accuracy.");
-DEFINE_double(quantized_accuracy, 1e-2, "Result Quantized Accuracy.");
-DEFINE_bool(zero_copy, false, "Use ZeroCopy to speedup Feed/Fetch.");
-DEFINE_bool(warmup,
-            false,
-            "Use warmup to calculate elapsed_time more accurately. "
-            "To reduce CI time, it sets false in default.");
-DEFINE_int32(warmup_iters, 1, "Number of batches to process during warmup.");
-
-DEFINE_bool(enable_profile, false, "Turn on profiler for fluid");
-DEFINE_int32(cpu_num_threads, 1, "Number of threads for each paddle instance.");
-DEFINE_bool(fuse_multi_gru,
-            false,
-            "Running the inference program with multi_gru_fuse_pass");
+PD_DEFINE_int32(iterations, 0, "number of batches to process");
+PD_DEFINE_int32(repeat, 1, "Running the inference program repeat times.");
+PD_DEFINE_bool(test_all_data, false, "Test the all dataset in data file.");
+PD_DEFINE_int32(num_threads,
+                1,
+                "Running the inference program in multi-threads.");
+PD_DEFINE_bool(use_analysis,
+               true,
+               "Running the inference program in analysis mode.");
+PD_DEFINE_bool(record_benchmark,
+               false,
+               "Record benchmark after profiling the model");
+PD_DEFINE_double(accuracy, 1e-3, "Result Accuracy.");
+PD_DEFINE_double(quantized_accuracy, 1e-2, "Result Quantized Accuracy.");
+PD_DEFINE_bool(zero_copy, false, "Use ZeroCopy to speedup Feed/Fetch.");
+PD_DEFINE_bool(warmup,
+               false,
+               "Use warmup to calculate elapsed_time more accurately. "
+               "To reduce CI time, it sets false in default.");
+PD_DEFINE_int32(warmup_iters, 1, "Number of batches to process during warmup.");
+
+PD_DEFINE_bool(enable_profile, false, "Turn on profiler for fluid");
+PD_DEFINE_int32(cpu_num_threads,
+                1,
+                "Number of threads for each paddle instance.");
+PD_DEFINE_bool(fuse_multi_gru,
+               false,
+               "Running the inference program with multi_gru_fuse_pass");
 
 // ipu related
-DEFINE_int32(ipu_micro_batch_size, 1, "micro batch size");
-DEFINE_int32(ipu_device_num, 1, "device num");
-DEFINE_bool(ipu_enable_pipelining, false, "enable pipelining");
-DEFINE_int32(ipu_batches_per_step,
-             1,
-             "the number of batches per run in pipelining");
-DEFINE_bool(ipu_enable_fp16, false, "enable fp16");
-DEFINE_int32(ipu_replica_num, 1, "replica num");
-DEFINE_double(ipu_available_memory_proportion,
-              1.0,
-              "available memory proportion");
-DEFINE_bool(ipu_enable_half_partial, false, "enable half partial");
+PD_DEFINE_int32(ipu_micro_batch_size, 1, "micro batch size");
+PD_DEFINE_int32(ipu_device_num, 1, "device num");
+PD_DEFINE_bool(ipu_enable_pipelining, false, "enable pipelining");
+PD_DEFINE_int32(ipu_batches_per_step,
+                1,
+                "the number of batches per run in pipelining");
+PD_DEFINE_bool(ipu_enable_fp16, false, "enable fp16");
+PD_DEFINE_int32(ipu_replica_num, 1, "replica num");
+PD_DEFINE_double(ipu_available_memory_proportion,
+                 1.0,
+                 "available memory proportion");
+PD_DEFINE_bool(ipu_enable_half_partial, false, "enable half partial");
 
 namespace paddle {
 namespace inference {
 
-using paddle::framework::proto::VarType;
-using float16 = paddle::platform::float16;
+using ::paddle::framework::proto::VarType;
+using float16 = ::paddle::platform::float16;
 
 template <typename T>
-constexpr paddle::PaddleDType GetPaddleDType();
+constexpr ::paddle::PaddleDType GetPaddleDType();
 
 template <>
-constexpr paddle::PaddleDType GetPaddleDType<int64_t>() {
-  return paddle::PaddleDType::INT64;
+constexpr ::paddle::PaddleDType GetPaddleDType<int64_t>() {
+  return ::paddle::PaddleDType::INT64;
 }
 
 template <>
-constexpr paddle::PaddleDType GetPaddleDType<float>() {
-  return paddle::PaddleDType::FLOAT32;
+constexpr ::paddle::PaddleDType GetPaddleDType<float>() {
+  return ::paddle::PaddleDType::FLOAT32;
 }
 
 void PrintConfig(const PaddlePredictor::Config *config, bool use_analysis) {
@@ -521,7 +525,7 @@ void PredictionWarmUp(PaddlePredictor *predictor,
   PrintTime(
       batch_size, 1, num_threads, tid, batch_latency, iterations, data_type);
   if (FLAGS_enable_profile) {
-    paddle::platform::ResetProfiler();
+    ::paddle::platform::ResetProfiler();
   }
 }
 
@@ -749,7 +753,7 @@ float CompareAccuracyOne(
     if (output_slots[i][compared_idx].lod.size() > 0)
       throw std::invalid_argument("CompareAccuracy: output has nonempty LoD.");
 
-    if (output_slots[i][compared_idx].dtype != paddle::PaddleDType::FLOAT32)
+    if (output_slots[i][compared_idx].dtype != ::paddle::PaddleDType::FLOAT32)
       throw std::invalid_argument(
           "CompareAccuracy: output is of a wrong type.");
 
@@ -1156,7 +1160,7 @@ static bool CompareTensor(const phi::DenseTensor &a,
   return true;
 }
 
-void ConvertFP32toFP16(paddle::PaddleTensor &tensor  // NOLINT
+void ConvertFP32toFP16(::paddle::PaddleTensor &tensor  // NOLINT
 ) {
   int num = 1;
   for (auto dim : tensor.shape) {
@@ -1177,7 +1181,7 @@ void ConvertFP32toFP16(paddle::PaddleTensor &tensor  // NOLINT
   tensor.dtype = PaddleDType::FLOAT16;
 }
 
-void ConvertFP16toFP32(paddle::PaddleTensor &tensor  // NOLINT
+void ConvertFP16toFP32(::paddle::PaddleTensor &tensor  // NOLINT
 ) {
   int num = 1;
   for (auto dim : tensor.shape) {
diff --git a/test/cpp/inference/api/trt_cascade_rcnn_test.cc b/test/cpp/inference/api/trt_cascade_rcnn_test.cc
index 86759c33e47..710e6481d01 100644
--- a/test/cpp/inference/api/trt_cascade_rcnn_test.cc
+++ b/test/cpp/inference/api/trt_cascade_rcnn_test.cc
@@ -15,7 +15,7 @@ limitations under the License. */
 #include <glog/logging.h>
 #include <gtest/gtest.h>
 
-#include "gflags/gflags.h"
+#include "paddle/utils/flags.h"
 #include "test/cpp/inference/api/trt_test_helper.h"
 
 namespace paddle {
diff --git a/test/cpp/inference/api/trt_dynamic_shape_ernie_fp16_serialize_deserialize_test.cc b/test/cpp/inference/api/trt_dynamic_shape_ernie_fp16_serialize_deserialize_test.cc
index 806950ca8d6..34ddb8fa3c3 100644
--- a/test/cpp/inference/api/trt_dynamic_shape_ernie_fp16_serialize_deserialize_test.cc
+++ b/test/cpp/inference/api/trt_dynamic_shape_ernie_fp16_serialize_deserialize_test.cc
@@ -23,7 +23,7 @@ limitations under the License. */
 #include <glog/logging.h>
 #include <gtest/gtest.h>
 
-#include "gflags/gflags.h"
+#include "paddle/utils/flags.h"
 #include "test/cpp/inference/api/trt_dynamic_shape_ernie_serialize_deserialize_test.h"
 
 namespace paddle {
diff --git a/test/cpp/inference/api/trt_dynamic_shape_ernie_serialize_deserialize_test.cc b/test/cpp/inference/api/trt_dynamic_shape_ernie_serialize_deserialize_test.cc
index bcf82d66f78..7946b7c2428 100644
--- a/test/cpp/inference/api/trt_dynamic_shape_ernie_serialize_deserialize_test.cc
+++ b/test/cpp/inference/api/trt_dynamic_shape_ernie_serialize_deserialize_test.cc
@@ -23,7 +23,7 @@ limitations under the License. */
 #include <glog/logging.h>
 #include <gtest/gtest.h>
 
-#include "gflags/gflags.h"
+#include "paddle/utils/flags.h"
 #include "test/cpp/inference/api/trt_dynamic_shape_ernie_serialize_deserialize_test.h"
 
 namespace paddle {
diff --git a/test/cpp/inference/api/trt_dynamic_shape_ernie_serialize_deserialize_test.h b/test/cpp/inference/api/trt_dynamic_shape_ernie_serialize_deserialize_test.h
index 09b20d23e97..e046181dbf0 100644
--- a/test/cpp/inference/api/trt_dynamic_shape_ernie_serialize_deserialize_test.h
+++ b/test/cpp/inference/api/trt_dynamic_shape_ernie_serialize_deserialize_test.h
@@ -25,7 +25,7 @@ limitations under the License. */
 #include <string>
 #include <vector>
 
-#include "gflags/gflags.h"
+#include "paddle/utils/flags.h"
 #include "test/cpp/inference/api/trt_test_helper.h"
 
 namespace paddle {
diff --git a/test/cpp/inference/api/trt_dynamic_shape_ernie_test.cc b/test/cpp/inference/api/trt_dynamic_shape_ernie_test.cc
index 8abf7224a13..fd31613c2b6 100644
--- a/test/cpp/inference/api/trt_dynamic_shape_ernie_test.cc
+++ b/test/cpp/inference/api/trt_dynamic_shape_ernie_test.cc
@@ -14,7 +14,7 @@ limitations under the License. */
 
 #include <glog/logging.h>
 #include <gtest/gtest.h>
-#include "gflags/gflags.h"
+#include "paddle/utils/flags.h"
 
 #include "paddle/fluid/inference/tensorrt/helper.h"
 #include "test/cpp/inference/api/trt_test_helper.h"
diff --git a/test/cpp/inference/api/trt_dynamic_shape_test.cc b/test/cpp/inference/api/trt_dynamic_shape_test.cc
index 505c0cdf083..8f284d75b7e 100644
--- a/test/cpp/inference/api/trt_dynamic_shape_test.cc
+++ b/test/cpp/inference/api/trt_dynamic_shape_test.cc
@@ -15,7 +15,7 @@ limitations under the License. */
 #include <glog/logging.h>
 #include <gtest/gtest.h>
 
-#include "gflags/gflags.h"
+#include "paddle/utils/flags.h"
 #include "test/cpp/inference/api/trt_test_helper.h"
 
 namespace paddle {
diff --git a/test/cpp/inference/api/trt_dynamic_shape_transformer_prune_test.cc b/test/cpp/inference/api/trt_dynamic_shape_transformer_prune_test.cc
index 2e71da39b5f..ff8c60df005 100644
--- a/test/cpp/inference/api/trt_dynamic_shape_transformer_prune_test.cc
+++ b/test/cpp/inference/api/trt_dynamic_shape_transformer_prune_test.cc
@@ -15,7 +15,7 @@ limitations under the License. */
 #include <glog/logging.h>
 #include <gtest/gtest.h>
 
-#include "gflags/gflags.h"
+#include "paddle/utils/flags.h"
 #include "test/cpp/inference/api/trt_test_helper.h"
 
 namespace paddle {
diff --git a/test/cpp/inference/api/trt_fc_prelu_test.cc b/test/cpp/inference/api/trt_fc_prelu_test.cc
index a1ef3340759..5f10c12bf3d 100644
--- a/test/cpp/inference/api/trt_fc_prelu_test.cc
+++ b/test/cpp/inference/api/trt_fc_prelu_test.cc
@@ -15,7 +15,7 @@ limitations under the License. */
 #include <glog/logging.h>
 #include <gtest/gtest.h>
 
-#include "gflags/gflags.h"
+#include "paddle/utils/flags.h"
 #include "test/cpp/inference/api/trt_test_helper.h"
 
 namespace paddle {
diff --git a/test/cpp/inference/api/trt_instance_norm_converter_test.cc b/test/cpp/inference/api/trt_instance_norm_converter_test.cc
index b58ddb2d919..fc78219a9db 100644
--- a/test/cpp/inference/api/trt_instance_norm_converter_test.cc
+++ b/test/cpp/inference/api/trt_instance_norm_converter_test.cc
@@ -15,7 +15,7 @@ limitations under the License. */
 #include <glog/logging.h>
 #include <gtest/gtest.h>
 
-#include "gflags/gflags.h"
+#include "paddle/utils/flags.h"
 #include "test/cpp/inference/api/trt_test_helper.h"
 
 namespace paddle {
diff --git a/test/cpp/inference/api/trt_mark_trt_engine_outputs_test.cc b/test/cpp/inference/api/trt_mark_trt_engine_outputs_test.cc
index 9c6a87a6d16..d34d640cfaf 100644
--- a/test/cpp/inference/api/trt_mark_trt_engine_outputs_test.cc
+++ b/test/cpp/inference/api/trt_mark_trt_engine_outputs_test.cc
@@ -12,7 +12,6 @@ limitations under the License. */
 #include <glog/logging.h>
 #include <gtest/gtest.h>
 
-#include "gflags/gflags.h"
 #include "test/cpp/inference/api/trt_test_helper.h"
 
 namespace paddle {
diff --git a/test/cpp/inference/api/trt_mobilenet_test.cc b/test/cpp/inference/api/trt_mobilenet_test.cc
index 7cae99e0d34..670eaa7b116 100644
--- a/test/cpp/inference/api/trt_mobilenet_test.cc
+++ b/test/cpp/inference/api/trt_mobilenet_test.cc
@@ -15,7 +15,7 @@ limitations under the License. */
 #include <glog/logging.h>
 #include <gtest/gtest.h>
 
-#include "gflags/gflags.h"
+#include "paddle/utils/flags.h"
 #include "test/cpp/inference/api/trt_test_helper.h"
 
 namespace paddle {
diff --git a/test/cpp/inference/api/trt_quant_int8_test.cc b/test/cpp/inference/api/trt_quant_int8_test.cc
index f40b2197fb2..46c1fb7c9f7 100644
--- a/test/cpp/inference/api/trt_quant_int8_test.cc
+++ b/test/cpp/inference/api/trt_quant_int8_test.cc
@@ -17,7 +17,7 @@ limitations under the License. */
 
 #include <numeric>
 
-#include "gflags/gflags.h"
+#include "paddle/utils/flags.h"
 #include "test/cpp/inference/api/trt_test_helper.h"
 
 namespace paddle {
diff --git a/test/cpp/inference/api/trt_quant_int8_yolov3_r50_test.cc b/test/cpp/inference/api/trt_quant_int8_yolov3_r50_test.cc
index ce058a1275c..412aeae6ed7 100644
--- a/test/cpp/inference/api/trt_quant_int8_yolov3_r50_test.cc
+++ b/test/cpp/inference/api/trt_quant_int8_yolov3_r50_test.cc
@@ -14,7 +14,7 @@ limitations under the License. */
 
 #include <numeric>
 
-#include "gflags/gflags.h"
+#include "paddle/utils/flags.h"
 #include "test/cpp/inference/api/trt_test_helper.h"
 
 namespace paddle {
diff --git a/test/cpp/inference/api/trt_rebind_stream_test.cc b/test/cpp/inference/api/trt_rebind_stream_test.cc
index 3a42af93427..8c8e78a5190 100644
--- a/test/cpp/inference/api/trt_rebind_stream_test.cc
+++ b/test/cpp/inference/api/trt_rebind_stream_test.cc
@@ -16,8 +16,8 @@ limitations under the License. */
 #include <gtest/gtest.h>
 #include <thread>
 
-#include "gflags/gflags.h"
 #include "paddle/fluid/inference/api/paddle_inference_api.h"
+#include "paddle/utils/flags.h"
 #include "test/cpp/inference/api/tester_helper.h"
 
 namespace paddle {
diff --git a/test/cpp/inference/api/trt_resnet50_test.cc b/test/cpp/inference/api/trt_resnet50_test.cc
index 8dde6a0f5dd..085b64ef882 100644
--- a/test/cpp/inference/api/trt_resnet50_test.cc
+++ b/test/cpp/inference/api/trt_resnet50_test.cc
@@ -15,7 +15,7 @@ limitations under the License. */
 #include <glog/logging.h>
 #include <gtest/gtest.h>
 
-#include "gflags/gflags.h"
+#include "paddle/utils/flags.h"
 #include "test/cpp/inference/api/trt_test_helper.h"
 
 namespace paddle {
diff --git a/test/cpp/inference/api/trt_resnext_test.cc b/test/cpp/inference/api/trt_resnext_test.cc
index a80058468d5..65e09d3532d 100644
--- a/test/cpp/inference/api/trt_resnext_test.cc
+++ b/test/cpp/inference/api/trt_resnext_test.cc
@@ -15,7 +15,7 @@ limitations under the License. */
 #include <glog/logging.h>
 #include <gtest/gtest.h>
 
-#include "gflags/gflags.h"
+#include "paddle/utils/flags.h"
 #include "test/cpp/inference/api/trt_test_helper.h"
 
 namespace paddle {
diff --git a/test/cpp/inference/api/trt_split_converter_test.cc b/test/cpp/inference/api/trt_split_converter_test.cc
index ca41ac5681e..8d87b98f6e3 100644
--- a/test/cpp/inference/api/trt_split_converter_test.cc
+++ b/test/cpp/inference/api/trt_split_converter_test.cc
@@ -15,7 +15,7 @@ limitations under the License. */
 #include <glog/logging.h>
 #include <gtest/gtest.h>
 
-#include "gflags/gflags.h"
+#include "paddle/utils/flags.h"
 #include "test/cpp/inference/api/trt_test_helper.h"
 
 namespace paddle {
diff --git a/test/cpp/inference/api/trt_test_helper.h b/test/cpp/inference/api/trt_test_helper.h
index db446e64054..dccbb589bdb 100644
--- a/test/cpp/inference/api/trt_test_helper.h
+++ b/test/cpp/inference/api/trt_test_helper.h
@@ -17,17 +17,17 @@ limitations under the License. */
 #include <string>
 #include <vector>
 
-#include "gflags/gflags.h"
 #include "glog/logging.h"
 #include "gtest/gtest.h"
+#include "paddle/utils/flags.h"
 #include "test/cpp/inference/api/tester_helper.h"
 
 namespace paddle {
 namespace inference {
 
-DEFINE_bool(use_tensorrt, true, "Test the performance of TensorRT engine.");
-DEFINE_string(prog_filename, "", "Name of model file.");
-DEFINE_string(param_filename, "", "Name of parameters file.");
+PD_DEFINE_bool(use_tensorrt, true, "Test the performance of TensorRT engine.");
+PD_DEFINE_string(prog_filename, "", "Name of model file.");
+PD_DEFINE_string(param_filename, "", "Name of parameters file.");
 
 template <typename ConfigType>
 void SetConfig(ConfigType* config,
diff --git a/test/cpp/inference/api/xpu_config_resnet50_test.cc b/test/cpp/inference/api/xpu_config_resnet50_test.cc
index ce3796e4209..d118eef9e88 100644
--- a/test/cpp/inference/api/xpu_config_resnet50_test.cc
+++ b/test/cpp/inference/api/xpu_config_resnet50_test.cc
@@ -15,7 +15,7 @@ limitations under the License. */
 #include <glog/logging.h>
 #include <gtest/gtest.h>
 #include <cmath>
-#include "gflags/gflags.h"
+#include "paddle/utils/flags.h"
 #include "test/cpp/inference/api/tester_helper.h"
 
 namespace paddle_infer {
diff --git a/test/cpp/inference/api/xpu_runtime_config_resnet50_test.cc b/test/cpp/inference/api/xpu_runtime_config_resnet50_test.cc
index b9ab6ea68d7..94de193c895 100644
--- a/test/cpp/inference/api/xpu_runtime_config_resnet50_test.cc
+++ b/test/cpp/inference/api/xpu_runtime_config_resnet50_test.cc
@@ -15,7 +15,7 @@ limitations under the License. */
 #include <glog/logging.h>
 #include <gtest/gtest.h>
 #include <cmath>
-#include "gflags/gflags.h"
+#include "paddle/utils/flags.h"
 #include "test/cpp/inference/api/tester_helper.h"
 #include "xpu/runtime.h"
 #include "xpu/xdnn.h"
diff --git a/test/cpp/inference/infer_ut/test_LeViT.cc b/test/cpp/inference/infer_ut/test_LeViT.cc
index 056371b0ae6..ed30f04e730 100644
--- a/test/cpp/inference/infer_ut/test_LeViT.cc
+++ b/test/cpp/inference/infer_ut/test_LeViT.cc
@@ -248,6 +248,6 @@ TEST(tensorrt_tester_LeViT, multi_stream_thread4_trt_fp32_bz2) {
 
 int main(int argc, char** argv) {
   ::testing::InitGoogleTest(&argc, argv);
-  ::GFLAGS_NAMESPACE::ParseCommandLineFlags(&argc, &argv, true);
+  gflags::ParseCommandLineFlags(&argc, &argv, true);
   return RUN_ALL_TESTS();
 }
diff --git a/test/cpp/inference/infer_ut/test_det_mv3_db.cc b/test/cpp/inference/infer_ut/test_det_mv3_db.cc
index a407251ccba..474d0701225 100644
--- a/test/cpp/inference/infer_ut/test_det_mv3_db.cc
+++ b/test/cpp/inference/infer_ut/test_det_mv3_db.cc
@@ -183,6 +183,6 @@ TEST(mkldnn_tester_det_mv3_db, multi_thread2_mkl_fp32_bz2) {
 
 int main(int argc, char** argv) {
   ::testing::InitGoogleTest(&argc, argv);
-  ::GFLAGS_NAMESPACE::ParseCommandLineFlags(&argc, &argv, true);
+  gflags::ParseCommandLineFlags(&argc, &argv, true);
   return RUN_ALL_TESTS();
 }
diff --git a/test/cpp/inference/infer_ut/test_ernie_text_cls.cc b/test/cpp/inference/infer_ut/test_ernie_text_cls.cc
index 2ffeb604230..ddf2dbc49d8 100644
--- a/test/cpp/inference/infer_ut/test_ernie_text_cls.cc
+++ b/test/cpp/inference/infer_ut/test_ernie_text_cls.cc
@@ -134,6 +134,6 @@ TEST(mkldnn_tester_ernie_text_cls, multi_thread4_mkl_fp32_bz2) {
 
 int main(int argc, char** argv) {
   ::testing::InitGoogleTest(&argc, argv);
-  ::GFLAGS_NAMESPACE::ParseCommandLineFlags(&argc, &argv, true);
+  gflags::ParseCommandLineFlags(&argc, &argv, true);
   return RUN_ALL_TESTS();
 }
diff --git a/test/cpp/inference/infer_ut/test_ernie_xnli_int8.cc b/test/cpp/inference/infer_ut/test_ernie_xnli_int8.cc
index e3030d08021..4e9c96c530a 100644
--- a/test/cpp/inference/infer_ut/test_ernie_xnli_int8.cc
+++ b/test/cpp/inference/infer_ut/test_ernie_xnli_int8.cc
@@ -192,7 +192,7 @@ TEST(tensorrt_tester_ernie_xnli, oss_varlen_truth_data_int8) {
 
 int main(int argc, char **argv) {
   ::testing::InitGoogleTest(&argc, argv);
-  ::GFLAGS_NAMESPACE::ParseCommandLineFlags(&argc, &argv, true);
+  gflags::ParseCommandLineFlags(&argc, &argv, true);
 
 #if IS_TRT_VERSION_GE(7200)
   return RUN_ALL_TESTS();
diff --git a/test/cpp/inference/infer_ut/test_mobilnetv1.cc b/test/cpp/inference/infer_ut/test_mobilnetv1.cc
index 582c34e1b0b..2660cc5cbd5 100644
--- a/test/cpp/inference/infer_ut/test_mobilnetv1.cc
+++ b/test/cpp/inference/infer_ut/test_mobilnetv1.cc
@@ -81,6 +81,6 @@ TEST(tensorrt_tester_mobilenetv1, tuned_dynamic_trt_fp32_bz2) {
 
 int main(int argc, char** argv) {
   ::testing::InitGoogleTest(&argc, argv);
-  ::GFLAGS_NAMESPACE::ParseCommandLineFlags(&argc, &argv, true);
+  gflags::ParseCommandLineFlags(&argc, &argv, true);
   return RUN_ALL_TESTS();
 }
diff --git a/test/cpp/inference/infer_ut/test_ppyolo_mbv3.cc b/test/cpp/inference/infer_ut/test_ppyolo_mbv3.cc
index a075192a580..407e7c87dc9 100644
--- a/test/cpp/inference/infer_ut/test_ppyolo_mbv3.cc
+++ b/test/cpp/inference/infer_ut/test_ppyolo_mbv3.cc
@@ -155,6 +155,6 @@ TEST(DISABLED_mkldnn_tester_ppyolo_mbv3, multi_thread4_mkl_bz2) {
 
 int main(int argc, char** argv) {
   ::testing::InitGoogleTest(&argc, argv);
-  ::GFLAGS_NAMESPACE::ParseCommandLineFlags(&argc, &argv, true);
+  gflags::ParseCommandLineFlags(&argc, &argv, true);
   return RUN_ALL_TESTS();
 }
diff --git a/test/cpp/inference/infer_ut/test_ppyolov2_r50vd.cc b/test/cpp/inference/infer_ut/test_ppyolov2_r50vd.cc
index ad91cb5bc9e..c90256fb8b8 100644
--- a/test/cpp/inference/infer_ut/test_ppyolov2_r50vd.cc
+++ b/test/cpp/inference/infer_ut/test_ppyolov2_r50vd.cc
@@ -154,6 +154,6 @@ TEST(mkldnn_tester_ppyolov2_r50vd, multi_thread2_mkl_bz2) {
 
 int main(int argc, char** argv) {
   ::testing::InitGoogleTest(&argc, argv);
-  ::GFLAGS_NAMESPACE::ParseCommandLineFlags(&argc, &argv, true);
+  gflags::ParseCommandLineFlags(&argc, &argv, true);
   return RUN_ALL_TESTS();
 }
diff --git a/test/cpp/inference/infer_ut/test_resnet50.cc b/test/cpp/inference/infer_ut/test_resnet50.cc
index 1c9b7056492..50b0b71c2e1 100644
--- a/test/cpp/inference/infer_ut/test_resnet50.cc
+++ b/test/cpp/inference/infer_ut/test_resnet50.cc
@@ -242,6 +242,6 @@ TEST(DISABLED_tensorrt_tester_resnet50, profile_multi_thread_trt_fp32) {
 
 int main(int argc, char** argv) {
   ::testing::InitGoogleTest(&argc, argv);
-  ::GFLAGS_NAMESPACE::ParseCommandLineFlags(&argc, &argv, true);
+  gflags::ParseCommandLineFlags(&argc, &argv, true);
   return RUN_ALL_TESTS();
 }
diff --git a/test/cpp/inference/infer_ut/test_resnet50_quant.cc b/test/cpp/inference/infer_ut/test_resnet50_quant.cc
index 452cf31b311..c2c44102f56 100644
--- a/test/cpp/inference/infer_ut/test_resnet50_quant.cc
+++ b/test/cpp/inference/infer_ut/test_resnet50_quant.cc
@@ -171,6 +171,6 @@ TEST(DISABLED_tensorrt_tester_resnet50_quant, multi_thread_multi_instance) {
 
 int main(int argc, char** argv) {
   ::testing::InitGoogleTest(&argc, argv);
-  ::GFLAGS_NAMESPACE::ParseCommandLineFlags(&argc, &argv, true);
+  gflags::ParseCommandLineFlags(&argc, &argv, true);
   return RUN_ALL_TESTS();
 }
diff --git a/test/cpp/inference/infer_ut/test_yolov3.cc b/test/cpp/inference/infer_ut/test_yolov3.cc
index 4dc35af4959..4a3e65ba5a5 100644
--- a/test/cpp/inference/infer_ut/test_yolov3.cc
+++ b/test/cpp/inference/infer_ut/test_yolov3.cc
@@ -154,6 +154,6 @@ TEST(test_yolov3, multi_thread4_mkl_bz2) {
 
 int main(int argc, char** argv) {
   ::testing::InitGoogleTest(&argc, argv);
-  ::GFLAGS_NAMESPACE::ParseCommandLineFlags(&argc, &argv, true);
+  gflags::ParseCommandLineFlags(&argc, &argv, true);
   return RUN_ALL_TESTS();
 }
diff --git a/test/cpp/phi/api/scale_api.h b/test/cpp/phi/api/scale_api.h
index 571ab0defbc..104034d5eff 100644
--- a/test/cpp/phi/api/scale_api.h
+++ b/test/cpp/phi/api/scale_api.h
@@ -14,7 +14,6 @@
 
 #pragma once
 
-#include "gflags/gflags.h"
 #include "glog/logging.h"
 #include "paddle/phi/api/include/tensor.h"
 #include "paddle/phi/api/lib/kernel_dispatch.h"
@@ -26,6 +25,7 @@
 #include "paddle/phi/core/meta_tensor.h"
 #include "paddle/phi/infermeta/unary.h"
 #include "paddle/phi/kernels/scale_kernel.h"
+#include "paddle/utils/flags.h"
 
 PHI_DECLARE_int32(low_precision_op_list);
 namespace paddle {
diff --git a/test/cpp/prim/test_static_prim.cc b/test/cpp/prim/test_static_prim.cc
index e26f54a44be..d4f5dcb8998 100644
--- a/test/cpp/prim/test_static_prim.cc
+++ b/test/cpp/prim/test_static_prim.cc
@@ -28,7 +28,7 @@
 #include "paddle/phi/core/flags.h"
 #include "paddle/phi/core/kernel_registry.h"
 
-DECLARE_bool(prim_enabled);
+PD_DECLARE_bool(prim_enabled);
 PHI_DECLARE_string(tensor_operants_mode);
 
 PD_DECLARE_KERNEL(full, CPU, ALL_LAYOUT);
-- 
GitLab