make flag setter easier

ca0136a6 · sneaxiy · e93c18a3 · ca0136a6 · ca0136a6 · ca0136a6
28 changed file
--- a/paddle/fluid/framework/details/reduce_op_handle.cc
+++ b/paddle/fluid/framework/details/reduce_op_handle.cc
@@ -19,7 +19,7 @@
 #include "paddle/fluid/framework/details/variable_visitor.h"
 #include "paddle/fluid/platform/profiler.h"
-DEFINE_bool(
+PADDLE_DEFINE_EXPORTED_bool(
    cpu_deterministic, false,
    "Whether to make the result of computation deterministic in CPU side.");

--- a/paddle/fluid/framework/ir/coalesce_grad_tensor_pass.cc
+++ b/paddle/fluid/framework/ir/coalesce_grad_tensor_pass.cc
@@ -25,13 +25,14 @@ class VarDesc;
 }  // namespace framework
 }  // namespace paddle
-DEFINE_double(fuse_parameter_memory_size, -1.0,  // MBytes
+PADDLE_DEFINE_EXPORTED_double(
-              "fuse_parameter_memory_size is up limited memory size(MB)"
+    fuse_parameter_memory_size, -1.0,  // MBytes
-              "of one group parameters' gradient which is the input "
+    "fuse_parameter_memory_size is up limited memory size(MB)"
-              "of communication calling(e.g NCCLAllReduce). "
+    "of one group parameters' gradient which is the input "
-              "The default value is 0, it means that "
+    "of communication calling(e.g NCCLAllReduce). "
-              "not set group according to memory_size.");
+    "The default value is 0, it means that "
-DEFINE_int32(
+    "not set group according to memory_size.");
+PADDLE_DEFINE_EXPORTED_int32(
    fuse_parameter_groups_size, 1,
    "fuse_parameter_groups_size is the up limited size of one group "
    "parameters' gradient. "

--- a/paddle/fluid/framework/ir/graph.cc
+++ b/paddle/fluid/framework/ir/graph.cc
@@ -17,8 +17,8 @@ limitations under the License. */
 #include "paddle/fluid/framework/ir/graph.h"
 #include "paddle/fluid/framework/operator.h"
-DEFINE_bool(convert_all_blocks, true,
+PADDLE_DEFINE_EXPORTED_bool(convert_all_blocks, true,
-            "Convert all blocks in program into SSAgraphs");
+                            "Convert all blocks in program into SSAgraphs");
 namespace paddle {
 namespace framework {

--- a/paddle/fluid/framework/ir/graph_helper.cc
+++ b/paddle/fluid/framework/ir/graph_helper.cc
@@ -18,9 +18,9 @@ limitations under the License. */
 #include "paddle/fluid/framework/op_proto_maker.h"
 DECLARE_bool(convert_all_blocks);
-DEFINE_string(print_sub_graph_dir, "",
+PADDLE_DEFINE_EXPORTED_string(print_sub_graph_dir, "",
-              "FLAGS_print_sub_graph_dir is used "
+                              "FLAGS_print_sub_graph_dir is used "
-              "to print the nodes of sub_graphs.");
+                              "to print the nodes of sub_graphs.");
 namespace paddle {
 namespace framework {

--- a/paddle/fluid/framework/operator.cc
+++ b/paddle/fluid/framework/operator.cc
@@ -47,7 +47,8 @@ class LoDTensor;
 DECLARE_bool(benchmark);
 DECLARE_bool(check_nan_inf);
 DECLARE_bool(enable_unused_var_check);
-DEFINE_int32(inner_op_parallelism, 0, "number of threads for inner op");
+PADDLE_DEFINE_EXPORTED_int32(inner_op_parallelism, 0,
+                             "number of threads for inner op");
 namespace paddle {
 namespace framework {

--- a/paddle/fluid/framework/parallel_executor.cc
+++ b/paddle/fluid/framework/parallel_executor.cc
@@ -46,11 +46,13 @@ DECLARE_double(eager_delete_tensor_gb);
 #ifdef WITH_GPERFTOOLS
 #include "gperftools/profiler.h"
 #endif
-DEFINE_string(pe_profile_fname, "",
+PADDLE_DEFINE_EXPORTED_string(
-              "Profiler filename for PE, which generated by gperftools."
+    pe_profile_fname, "",
-              "Only valid when compiled `WITH_PRIFILER=ON`. Empty if disable.");
+    "Profiler filename for PE, which generated by gperftools."
-DEFINE_bool(enable_parallel_graph, false,
+    "Only valid when compiled `WITH_PRIFILER=ON`. Empty if disable.");
-            "Force disable parallel graph execution mode if set false.");
+PADDLE_DEFINE_EXPORTED_bool(
+    enable_parallel_graph, false,
+    "Force disable parallel graph execution mode if set false.");
 namespace paddle {
 namespace framework {

--- a/paddle/fluid/framework/scope.cc
+++ b/paddle/fluid/framework/scope.cc
@@ -19,7 +19,7 @@ limitations under the License. */
 DECLARE_bool(benchmark);
-DEFINE_bool(
+PADDLE_DEFINE_EXPORTED_bool(
    eager_delete_scope, true,
    "Delete local scope eagerly. It will reduce GPU memory usage but "
    "slow down the destruction of variables.(around 1% performance harm)");

--- a/paddle/fluid/framework/unused_var_check.cc
+++ b/paddle/fluid/framework/unused_var_check.cc
@@ -17,15 +17,16 @@ limitations under the License. */
 #include <glog/logging.h>
 #include <string>
-#include "gflags/gflags.h"
 #include "paddle/fluid/framework/no_need_buffer_vars_inference.h"
 #include "paddle/fluid/framework/op_info.h"
 #include "paddle/fluid/framework/operator.h"
 #include "paddle/fluid/platform/enforce.h"
+#include "paddle/fluid/platform/flags.h"
-DEFINE_bool(enable_unused_var_check, false,
+PADDLE_DEFINE_EXPORTED_bool(
-            "Checking whether operator contains unused inputs, "
+    enable_unused_var_check, false,
-            "especially for grad operator. It should be in unittest.");
+    "Checking whether operator contains unused inputs, "
+    "especially for grad operator. It should be in unittest.");
 namespace paddle {
 namespace framework {

--- a/paddle/fluid/imperative/CMakeLists.txt
+++ b/paddle/fluid/imperative/CMakeLists.txt
@@ -11,7 +11,7 @@ cc_library(amp SRCS amp_auto_cast.cc DEPS layer )
 cc_library(tracer SRCS tracer.cc DEPS layer engine program_desc_tracer amp denormal)
 cc_library(basic_engine SRCS basic_engine.cc DEPS layer gradient_accumulator)
 cc_library(engine SRCS basic_engine.cc partial_grad_engine.cc DEPS layer gradient_accumulator)
-cc_library(imperative_profiler SRCS profiler.cc)
+cc_library(imperative_profiler SRCS profiler.cc DEPS flags)
 if(NOT WIN32)
    if(WITH_NCCL OR WITH_RCCL)
        cc_library(imperative_all_reduce SRCS all_reduce.cc DEPS collective_helper device_context selected_rows tensor)

--- a/paddle/fluid/imperative/flags.cc
+++ b/paddle/fluid/imperative/flags.cc
@@ -13,11 +13,11 @@
 // limitations under the License.
 #include "paddle/fluid/imperative/flags.h"
-#include "gflags/gflags.h"
+#include "paddle/fluid/platform/flags.h"
-DEFINE_uint64(dygraph_debug, 0,
+PADDLE_DEFINE_EXPORTED_uint64(dygraph_debug, 0,
-              "Debug level of dygraph. This flag is not "
+                              "Debug level of dygraph. This flag is not "
-              "open to users");
+                              "open to users");
 namespace paddle {
 namespace imperative {

--- a/paddle/fluid/imperative/profiler.cc
+++ b/paddle/fluid/imperative/profiler.cc
@@ -19,9 +19,9 @@
 #endif
 #include <glog/logging.h>
 #include <mutex>  // NOLINT
-#include "gflags/gflags.h"
+#include "paddle/fluid/platform/flags.h"
-DEFINE_string(
+PADDLE_DEFINE_EXPORTED_string(
    tracer_profile_fname, "xxgperf",
    "Profiler filename for imperative tracer, which generated by gperftools."
    "Only valid when compiled `WITH_PROFILER=ON`. Empty if disable.");

--- a/paddle/fluid/memory/allocation/CMakeLists.txt
+++ b/paddle/fluid/memory/allocation/CMakeLists.txt
@@ -99,7 +99,7 @@ cc_test(allocator_facade_abs_flags_test SRCS allocator_facade_abs_flags_test.cc
 cc_test(allocator_facade_frac_flags_test SRCS allocator_facade_frac_flags_test.cc DEPS allocator_facade)
-cc_library(auto_growth_best_fit_allocator SRCS auto_growth_best_fit_allocator.cc DEPS allocator aligned_allocator)
+cc_library(auto_growth_best_fit_allocator SRCS auto_growth_best_fit_allocator.cc DEPS allocator aligned_allocator flags)
 cc_test(auto_growth_best_fit_allocator_facade_test SRCS auto_growth_best_fit_allocator_facade_test.cc DEPS cpu_allocator auto_growth_best_fit_allocator)
 cc_test(auto_growth_best_fit_allocator_test SRCS auto_growth_best_fit_allocator_test.cc DEPS auto_growth_best_fit_allocator)

--- a/paddle/fluid/memory/allocation/allocator_facade.cc
+++ b/paddle/fluid/memory/allocation/allocator_facade.cc
@@ -37,14 +37,15 @@
 #endif
 #include "paddle/fluid/platform/npu_info.h"
-DEFINE_int64(
+PADDLE_DEFINE_EXPORTED_int64(
    gpu_allocator_retry_time, 10000,
    "The retry time (milliseconds) when allocator fails "
    "to allocate memory. No retry if this value is not greater than 0");
-DEFINE_bool(use_system_allocator, false,
+PADDLE_DEFINE_EXPORTED_bool(
-            "Whether to use system allocator to allocate CPU and GPU memory. "
+    use_system_allocator, false,
-            "Only used for unittests.");
+    "Whether to use system allocator to allocate CPU and GPU memory. "
+    "Only used for unittests.");
 namespace paddle {
 namespace memory {

--- a/paddle/fluid/memory/allocation/auto_growth_best_fit_allocator.cc
+++ b/paddle/fluid/memory/allocation/auto_growth_best_fit_allocator.cc
@@ -17,18 +17,21 @@
 #include <algorithm>
 #include <mutex>  // NOLINT
 #include "paddle/fluid/memory/allocation/aligned_allocator.h"
+#include "paddle/fluid/platform/flags.h"
-DEFINE_bool(free_idle_chunk, false,
-            "Whether to free idle chunk when each allocation is freed. "
+PADDLE_DEFINE_READONLY_EXPORTED_bool(
-            "If false, all freed allocation would be cached to speed up next "
+    free_idle_chunk, false,
-            "allocation request. If true, no allocation would be cached. This "
+    "Whether to free idle chunk when each allocation is freed. "
-            "flag only works when FLAGS_allocator_strategy=auto_growth.");
+    "If false, all freed allocation would be cached to speed up next "
+    "allocation request. If true, no allocation would be cached. This "
-DEFINE_bool(free_when_no_cache_hit, false,
+    "flag only works when FLAGS_allocator_strategy=auto_growth.");
-            "Whether to free idle chunks when no cache hit. If true, idle "
-            "chunk would be freed when no cache hit; if false, idle "
+PADDLE_DEFINE_READONLY_EXPORTED_bool(
-            "chunk would be freed when out of memory occurs. This flag "
+    free_when_no_cache_hit, false,
-            "only works when FLAGS_allocator_strategy=auto_growth.");
+    "Whether to free idle chunks when no cache hit. If true, idle "
+    "chunk would be freed when no cache hit; if false, idle "
+    "chunk would be freed when out of memory occurs. This flag "
+    "only works when FLAGS_allocator_strategy=auto_growth.");
 namespace paddle {
 namespace memory {

--- a/paddle/fluid/memory/allocation/naive_best_fit_allocator.cc
+++ b/paddle/fluid/memory/allocation/naive_best_fit_allocator.cc
@@ -34,12 +34,13 @@
 #include "paddle/fluid/platform/xpu/xpu_header.h"
 #endif
-DEFINE_bool(init_allocated_mem, false,
+PADDLE_DEFINE_EXPORTED_bool(
-            "It is a mistake that the values of the memory allocated by "
+    init_allocated_mem, false,
-            "BuddyAllocator are always zeroed in some op's implementation. "
+    "It is a mistake that the values of the memory allocated by "
-            "To find this error in time, we use init_allocated_mem to indicate "
+    "BuddyAllocator are always zeroed in some op's implementation. "
-            "that initializing the allocated memory with a small value "
+    "To find this error in time, we use init_allocated_mem to indicate "
-            "during unit testing.");
+    "that initializing the allocated memory with a small value "
+    "during unit testing.");
 DECLARE_double(fraction_of_gpu_memory_to_use);
 DECLARE_uint64(initial_gpu_memory_in_mb);
 DECLARE_uint64(reallocate_gpu_memory_in_mb);

--- a/paddle/fluid/operators/pscore/heter_listen_and_serv_op.cc
+++ b/paddle/fluid/operators/pscore/heter_listen_and_serv_op.cc
@@ -15,7 +15,8 @@ limitations under the License. */
 #include "paddle/fluid/operators/pscore/heter_listen_and_serv_op.h"
 #include "paddle/fluid/framework/op_registry.h"
-DEFINE_int32(rpc_send_thread_num, 12, "number of threads for rpc send");
+PADDLE_DEFINE_EXPORTED_int32(rpc_send_thread_num, 12,
+                             "number of threads for rpc send");
 namespace paddle {
 namespace operators {

--- a/paddle/fluid/platform/CMakeLists.txt
+++ b/paddle/fluid/platform/CMakeLists.txt
@@ -37,13 +37,13 @@ if (WITH_PYTHON)
  endif(NOT WIN32)
 endif()
-cc_library(flags SRCS flags.cc DEPS gflags)
+cc_library(flags SRCS flags.cc DEPS gflags boost)
 cc_library(denormal SRCS denormal.cc DEPS)
 cc_library(errors SRCS errors.cc DEPS error_codes_proto)
 cc_test(errors_test SRCS errors_test.cc DEPS errors enforce)
-set(enforce_deps flags errors boost)
+set(enforce_deps flags errors boost flags)
 if(WITH_GPU)
  set(enforce_deps ${enforce_deps} external_error_proto)
 endif()

--- a/paddle/fluid/platform/cpu_info.cc
+++ b/paddle/fluid/platform/cpu_info.cc
@@ -31,7 +31,7 @@ limitations under the License. */
 #endif  // _WIN32
 #include <algorithm>
-#include "gflags/gflags.h"
+#include "paddle/fluid/platform/flags.h"
 DECLARE_double(fraction_of_cpu_memory_to_use);
 DECLARE_uint64(initial_cpu_memory_in_mb);
@@ -42,7 +42,8 @@ DECLARE_double(fraction_of_cuda_pinned_memory_to_use);
 // between host and device.  Allocates too much would reduce the amount
 // of memory available to the system for paging.  So, by default, we
 // should set false to use_pinned_memory.
-DEFINE_bool(use_pinned_memory, true, "If set, allocate cpu pinned memory.");
+PADDLE_DEFINE_EXPORTED_bool(use_pinned_memory, true,
+                            "If set, allocate cpu pinned memory.");
 namespace paddle {
 namespace platform {
@@ -54,7 +55,9 @@ size_t CpuTotalPhysicalMemory() {
  mib[1] = HW_MEMSIZE;
  int64_t size = 0;
  size_t len = sizeof(size);
-  if (sysctl(mib, 2, &size, &len, NULL, 0) == 0) return (size_t)size;
+  if (sysctl(mib, 2, &size, &len, NULL, 0) == 0) {
+    return static_cast<size_t>(size);
+  }
  return 0L;
 #elif defined(_WIN32)
  MEMORYSTATUSEX sMeminfo;

--- a/paddle/fluid/platform/enforce.h
+++ b/paddle/fluid/platform/enforce.h
@@ -101,6 +101,7 @@ limitations under the License. */
 #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
 #include "paddle/fluid/platform/type_defs.h"
 #endif
+#include "paddle/fluid/platform/flags.h"
 namespace paddle {
 namespace platform {

--- a/paddle/fluid/platform/flags.cc
+++ b/paddle/fluid/platform/flags.cc
--- a/paddle/fluid/platform/flags.h
+++ b/paddle/fluid/platform/flags.h
+// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#pragma once
+#include <cstdint>
+#include <map>
+#include <string>
+#include <type_traits>
+#include <typeindex>
+#include "boost/variant.hpp"
+#include "gflags/gflags.h"
+#include "paddle/fluid/platform/macros.h"
+namespace paddle {
+namespace platform {
+struct FlagInfo {
+  using ValueType =
+      boost::variant<bool, int32_t, int64_t, uint64_t, double, std::string>;
+  std::string name;
+  void *value_ptr;
+  ValueType default_value;
+  std::string doc;
+  bool is_writable;
+};
+using ExportedFlagInfoMap = std::map<std::string, FlagInfo>;
+const ExportedFlagInfoMap &GetExportedFlagInfoMap();
+#define __PADDLE_DEFINE_EXPORTED_FLAG(__name, __is_writable, __cpp_type,    \
+                                      __gflag_type, __default_value, __doc) \
+  DEFINE_##__gflag_type(__name, __default_value, __doc);                    \
+  struct __PaddleRegisterFlag_##__name {                                    \
+    __PaddleRegisterFlag_##__name() {                                       \
+      const auto &instance = ::paddle::platform::GetExportedFlagInfoMap();  \
+      using Type = ::paddle::platform::ExportedFlagInfoMap;                 \
+      auto &info = const_cast<Type &>(instance)[#__name];                   \
+      info.name = #__name;                                                  \
+      info.value_ptr = &(FLAGS_##__name);                                   \
+      info.default_value = static_cast<__cpp_type>(__default_value);        \
+      info.doc = __doc;                                                     \
+      info.is_writable = __is_writable;                                     \
+    }                                                                       \
+  };                                                                        \
+  static_assert(std::is_same<__PaddleRegisterFlag_##__name,                 \
+                             ::__PaddleRegisterFlag_##__name>::value,       \
+                "FLAGS should define in global namespace");                 \
+  static __PaddleRegisterFlag_##__name __PaddleRegisterFlag_instance##__name
+#define PADDLE_DEFINE_EXPORTED_bool(name, default_value, doc) \
+  __PADDLE_DEFINE_EXPORTED_FLAG(name, true, bool, bool, default_value, doc)
+#define PADDLE_DEFINE_READONLY_EXPORTED_bool(name, default_value, doc) \
+  __PADDLE_DEFINE_EXPORTED_FLAG(name, false, bool, bool, default_value, doc)
+#define PADDLE_DEFINE_EXPORTED_int32(name, default_value, doc) \
+  __PADDLE_DEFINE_EXPORTED_FLAG(name, true, int32_t, int32, default_value, doc)
+#define PADDLE_DEFINE_EXPORTED_int64(name, default_value, doc) \
+  __PADDLE_DEFINE_EXPORTED_FLAG(name, true, int64_t, int64, default_value, doc)
+#define PADDLE_DEFINE_EXPORTED_uint64(name, default_value, doc)              \
+  __PADDLE_DEFINE_EXPORTED_FLAG(name, true, uint64_t, uint64, default_value, \
+                                doc)
+#define PADDLE_DEFINE_EXPORTED_double(name, default_value, doc) \
+  __PADDLE_DEFINE_EXPORTED_FLAG(name, true, double, double, default_value, doc)
+#define PADDLE_DEFINE_EXPORTED_string(name, default_value, doc)    \
+  __PADDLE_DEFINE_EXPORTED_FLAG(name, true, ::std::string, string, \
+                                default_value, doc)
+}  // namespace platform
+}  // namespace paddle
--- a/paddle/fluid/platform/init.cc
+++ b/paddle/fluid/platform/init.cc
@@ -43,9 +43,10 @@ limitations under the License. */
 #endif
 DECLARE_int32(paddle_num_threads);
-DEFINE_int32(multiple_of_cupti_buffer_size, 1,
+PADDLE_DEFINE_EXPORTED_int32(
-             "Multiple of the CUPTI device buffer size. If the timestamps have "
+    multiple_of_cupti_buffer_size, 1,
-             "been dropped when you are profiling, try increasing this value.");
+    "Multiple of the CUPTI device buffer size. If the timestamps have "
+    "been dropped when you are profiling, try increasing this value.");
 namespace paddle {
 namespace platform {

--- a/paddle/fluid/platform/place.cc
+++ b/paddle/fluid/platform/place.cc
@@ -14,11 +14,12 @@ limitations under the License. */
 #include "paddle/fluid/platform/place.h"
-DEFINE_bool(benchmark, false,
+PADDLE_DEFINE_EXPORTED_bool(
-            "Doing memory benchmark. It will make deleting scope synchronized, "
+    benchmark, false,
-            "and add some memory usage logs."
+    "Doing memory benchmark. It will make deleting scope synchronized, "
-            "Default cuda is asynchronous device, set to True will"
+    "and add some memory usage logs."
-            "force op run in synchronous mode.");
+    "Default cuda is asynchronous device, set to True will"
+    "force op run in synchronous mode.");
 namespace paddle {
 namespace platform {

--- a/paddle/fluid/platform/profiler.cc
+++ b/paddle/fluid/platform/profiler.cc
@@ -24,7 +24,8 @@ limitations under the License. */
 #include "paddle/fluid/platform/dynload/nvtx.h"
 #endif
-DEFINE_bool(enable_rpc_profiler, false, "Enable rpc profiler or not.");
+PADDLE_DEFINE_EXPORTED_bool(enable_rpc_profiler, false,
+                            "Enable rpc profiler or not.");
 namespace paddle {
 namespace platform {

--- a/paddle/fluid/platform/xpu/xpu_info.cc
+++ b/paddle/fluid/platform/xpu/xpu_info.cc
@@ -18,14 +18,15 @@ limitations under the License. */
 #include "paddle/fluid/platform/xpu/xpu_header.h"
 #include "paddle/fluid/string/split.h"
-DEFINE_string(selected_xpus, "",
+PADDLE_DEFINE_EXPORTED_string(
-              "A list of device ids separated by comma, like: 0,1,2,3. "
+    selected_xpus, "",
-              "This option is useful when doing multi process training and "
+    "A list of device ids separated by comma, like: 0,1,2,3. "
-              "each process have only one device (XPU). If you want to use "
+    "This option is useful when doing multi process training and "
-              "all visible devices, set this to empty string. NOTE: the "
+    "each process have only one device (XPU). If you want to use "
-              "reason of doing this is that we want to use P2P communication"
+    "all visible devices, set this to empty string. NOTE: the "
-              "between XPU devices, use XPU_VISIBLE_DEVICES can only use"
+    "reason of doing this is that we want to use P2P communication"
-              "share-memory only.");
+    "between XPU devices, use XPU_VISIBLE_DEVICES can only use"
+    "share-memory only.");
 namespace paddle {
 namespace platform {

--- a/paddle/fluid/pybind/global_value_getter_setter.cc
+++ b/paddle/fluid/pybind/global_value_getter_setter.cc
@@ -29,82 +29,8 @@
 #include "paddle/fluid/platform/macros.h"
 #include "pybind11/stl.h"
-// data processing
+// NOTE: where is these 2 flags from?
-DECLARE_bool(use_mkldnn);
-DECLARE_string(tracer_mkldnn_ops_on);
-DECLARE_string(tracer_mkldnn_ops_off);
-// debug
-DECLARE_bool(check_nan_inf);
-DECLARE_bool(cpu_deterministic);
-DECLARE_bool(enable_rpc_profiler);
-DECLARE_int32(multiple_of_cupti_buffer_size);
-DECLARE_bool(reader_queue_speed_test_mode);
-DECLARE_int32(call_stack_level);
-DECLARE_bool(sort_sum_gradient);
-DECLARE_bool(check_kernel_launch);
-// device management
-DECLARE_int32(paddle_num_threads);
-// executor
-DECLARE_bool(enable_parallel_graph);
-DECLARE_string(pe_profile_fname);
-DECLARE_string(print_sub_graph_dir);
-DECLARE_bool(use_ngraph);
-// memory management
-DECLARE_string(allocator_strategy);
-DECLARE_double(eager_delete_tensor_gb);
-DECLARE_double(fraction_of_cpu_memory_to_use);
-DECLARE_bool(free_idle_chunk);
-DECLARE_bool(free_when_no_cache_hit);
-DECLARE_int32(fuse_parameter_groups_size);
-DECLARE_double(fuse_parameter_memory_size);
-DECLARE_bool(init_allocated_mem);
-DECLARE_uint64(initial_cpu_memory_in_mb);
-DECLARE_double(memory_fraction_of_eager_deletion);
-DECLARE_bool(use_pinned_memory);
-DECLARE_bool(use_system_allocator);
-// others
-DECLARE_bool(benchmark);
-DECLARE_int32(inner_op_parallelism);
-DECLARE_int32(max_inplace_grad_add);
-DECLARE_string(tracer_profile_fname);
-DECLARE_bool(apply_pass_to_program);
-#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
-// cudnn
-DECLARE_uint64(conv_workspace_size_limit);
-DECLARE_bool(cudnn_batchnorm_spatial_persistent);
-DECLARE_bool(cudnn_deterministic);
-DECLARE_bool(cudnn_exhaustive_search);
-DECLARE_bool(conv2d_disable_cudnn);
-// data processing
-DECLARE_bool(enable_cublas_tensor_op_math);
-// device management
-DECLARE_string(selected_gpus);
-// memory management
-DECLARE_bool(eager_delete_scope);
-DECLARE_bool(fast_eager_deletion_mode);
-DECLARE_double(fraction_of_cuda_pinned_memory_to_use);
-DECLARE_double(fraction_of_gpu_memory_to_use);
-DECLARE_uint64(gpu_memory_limit_mb);
-DECLARE_uint64(initial_gpu_memory_in_mb);
-DECLARE_uint64(reallocate_gpu_memory_in_mb);
-// others
-DECLARE_bool(sync_nccl_allreduce);
-#endif
-#ifdef PADDLE_WITH_XPU
-// device management
-DECLARE_string(selected_xpus);
-#endif
-#ifdef PADDLE_WITH_ASCEND_CL
-// device management
-DECLARE_string(selected_npus);
-// set minmum loss scaling value
-DECLARE_int32(min_loss_scaling);
-#endif
 #ifdef PADDLE_WITH_DISTRIBUTE
-DECLARE_int32(rpc_send_thread_num);
 DECLARE_int32(rpc_get_thread_num);
 DECLARE_int32(rpc_prefetch_thread_num);
 #endif
@@ -181,7 +107,6 @@ class PYBIND11_HIDDEN GlobalVarGetterSetterRegistry {
    PADDLE_ENFORCE_NOT_NULL(setter,
                            platform::errors::InvalidArgument(
                                "Setter of %s should not be null", name));
    var_infos_.insert({name, VarInfo(is_public, getter, setter)});
  }
@@ -243,81 +168,6 @@ class PYBIND11_HIDDEN GlobalVarGetterSetterRegistry {
 GlobalVarGetterSetterRegistry GlobalVarGetterSetterRegistry::instance_;
-class GlobalVarGetterSetterRegistryHelper {
- public:
-  GlobalVarGetterSetterRegistryHelper(bool is_public, bool is_writable,
-                                      const std::string &var_names)
-      : is_public_(is_public),
-        is_writable_(is_writable),
-        var_names_(SplitVarNames(var_names)) {}
-  template <typename... Args>
-  void Register(Args &&... args) const {
-    Impl<0, sizeof...(args) == 1, Args...>::Register(
-        is_public_, is_writable_, var_names_, std::forward<Args>(args)...);
-  }
- private:
-  static std::vector<std::string> SplitVarNames(const std::string &names) {
-    auto valid_char = [](char ch) { return !std::isspace(ch) && ch != ','; };
-    std::vector<std::string> ret;
-    size_t i = 0, j = 0, n = names.size();
-    while (i < n) {
-      for (; i < n && !valid_char(names[i]); ++i) {
-      }
-      for (j = i + 1; j < n && valid_char(names[j]); ++j) {
-      }
-      if (i < n && j <= n) {
-        auto substring = names.substr(i, j - i);
-        VLOG(10) << "Get substring: \"" << substring << "\"";
-        ret.emplace_back(substring);
-      }
-      i = j + 1;
-    }
-    return ret;
-  }
- private:
-  template <size_t kIdx, bool kIsStop, typename T, typename... Args>
-  struct Impl {
-    static void Register(bool is_public, bool is_writable,
-                         const std::vector<std::string> &var_names, T &&var,
-                         Args &&... args) {
-      PADDLE_ENFORCE_EQ(kIdx + 1 + sizeof...(args), var_names.size(),
-                        platform::errors::InvalidArgument(
-                            "Argument number not match name number"));
-      Impl<kIdx, true, T>::Register(is_public, is_writable, var_names, var);
-      Impl<kIdx + 1, sizeof...(Args) == 1, Args...>::Register(
-          is_public, is_writable, var_names, std::forward<Args>(args)...);
-    }
-  };
-  template <size_t kIdx, typename T>
-  struct Impl<kIdx, true, T> {
-    static void Register(bool is_public, bool is_writable,
-                         const std::vector<std::string> &var_names, T &&var) {
-      auto *instance = GlobalVarGetterSetterRegistry::MutableInstance();
-      if (is_writable) {
-        instance->Register(
-            var_names[kIdx], is_public,
-            GlobalVarGetterSetterRegistry::CreateGetter(std::forward<T>(var)),
-            GlobalVarGetterSetterRegistry::CreateSetter(&var));
-      } else {
-        instance->Register(
-            var_names[kIdx], is_public,
-            GlobalVarGetterSetterRegistry::CreateGetter(std::forward<T>(var)));
-      }
-    }
-  };
- private:
-  const bool is_public_;
-  const bool is_writable_;
-  const std::vector<std::string> var_names_;
-};
 static void RegisterGlobalVarGetterSetter();
 void BindGlobalValueGetterSetter(pybind11::module *module) {
@@ -338,65 +188,51 @@ void BindGlobalValueGetterSetter(pybind11::module *module) {
 }
 /* Public vars are designed to be writable. */
-#define REGISTER_PUBLIC_GLOBAL_VAR(...)                                        \
+#define REGISTER_PUBLIC_GLOBAL_VAR(var)                                    \
-  do {                                                                         \
+  do {                                                                     \
-    GlobalVarGetterSetterRegistryHelper(/*is_public=*/true,                    \
+    auto *instance = GlobalVarGetterSetterRegistry::MutableInstance();     \
-                                        /*is_writable=*/true, "" #__VA_ARGS__) \
+    instance->Register(#var, /*is_public=*/true,                           \
-        .Register(__VA_ARGS__);                                                \
+                       GlobalVarGetterSetterRegistry::CreateGetter(var),   \
+                       GlobalVarGetterSetterRegistry::CreateSetter(&var)); \
  } while (0)
-#define REGISTER_PRIVATE_GLOBAL_VAR(is_writable, ...)                     \
+struct RegisterGetterSetterVisitor : public boost::static_visitor<void> {
-  do {                                                                    \
+  RegisterGetterSetterVisitor(const std::string &name, bool is_public,
-    GlobalVarGetterSetterRegistryHelper(/*is_public=*/false, is_writable, \
+                              void *value_ptr)
-                                        "" #__VA_ARGS__)                  \
+      : name_(name), value_ptr_(value_ptr) {}
-        .Register(__VA_ARGS__);                                           \
-  } while (0)
-static void RegisterGlobalVarGetterSetter() {
+  template <typename T>
-  REGISTER_PRIVATE_GLOBAL_VAR(/*is_writable=*/false, FLAGS_free_idle_chunk,
+  void operator()(const T &) const {
-                              FLAGS_free_when_no_cache_hit);
+    auto &value = *static_cast<T *>(value_ptr_);
+    auto *instance = GlobalVarGetterSetterRegistry::MutableInstance();
-  REGISTER_PUBLIC_GLOBAL_VAR(
+    instance->Register(name_, is_public_,
-      FLAGS_eager_delete_tensor_gb, FLAGS_enable_parallel_graph,
+                       GlobalVarGetterSetterRegistry::CreateGetter(value),
-      FLAGS_allocator_strategy, FLAGS_use_system_allocator, FLAGS_check_nan_inf,
+                       GlobalVarGetterSetterRegistry::CreateSetter(&value));
-      FLAGS_call_stack_level, FLAGS_sort_sum_gradient, FLAGS_cpu_deterministic,
+  }
-      FLAGS_enable_rpc_profiler, FLAGS_multiple_of_cupti_buffer_size,
-      FLAGS_reader_queue_speed_test_mode, FLAGS_pe_profile_fname,
-      FLAGS_print_sub_graph_dir, FLAGS_fraction_of_cpu_memory_to_use,
-      FLAGS_fuse_parameter_groups_size, FLAGS_fuse_parameter_memory_size,
-      FLAGS_init_allocated_mem, FLAGS_initial_cpu_memory_in_mb,
-      FLAGS_memory_fraction_of_eager_deletion, FLAGS_use_pinned_memory,
-      FLAGS_benchmark, FLAGS_inner_op_parallelism, FLAGS_tracer_profile_fname,
-      FLAGS_paddle_num_threads, FLAGS_use_mkldnn, FLAGS_max_inplace_grad_add,
-      FLAGS_tracer_mkldnn_ops_on, FLAGS_tracer_mkldnn_ops_off,
-      FLAGS_apply_pass_to_program);
-#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
-  REGISTER_PUBLIC_GLOBAL_VAR(
-      FLAGS_gpu_memory_limit_mb, FLAGS_cudnn_deterministic,
-      FLAGS_conv_workspace_size_limit, FLAGS_cudnn_batchnorm_spatial_persistent,
-      FLAGS_cudnn_exhaustive_search, FLAGS_eager_delete_scope,
-      FLAGS_fast_eager_deletion_mode,
-      FLAGS_fraction_of_cuda_pinned_memory_to_use,
-      FLAGS_fraction_of_gpu_memory_to_use, FLAGS_initial_gpu_memory_in_mb,
-      FLAGS_reallocate_gpu_memory_in_mb, FLAGS_enable_cublas_tensor_op_math,
-      FLAGS_selected_gpus, FLAGS_sync_nccl_allreduce,
-      FLAGS_conv2d_disable_cudnn, FLAGS_check_kernel_launch);
-#endif
-#ifdef PADDLE_WITH_XPU
-  REGISTER_PUBLIC_GLOBAL_VAR(FLAGS_selected_xpus);
-#endif
-#ifdef PADDLE_WITH_ASCEND_CL
+ private:
-  REGISTER_PUBLIC_GLOBAL_VAR(FLAGS_selected_npus);
+  std::string name_;
-  REGISTER_PUBLIC_GLOBAL_VAR(FLAGS_min_loss_scaling);
+  bool is_public_;
-#endif
+  void *value_ptr_;
+};
+static void RegisterGlobalVarGetterSetter() {
 #ifdef PADDLE_WITH_DITRIBUTE
-  REGISTER_PUBLIC_GLOBAL_VAR(FLAGS_rpc_send_thread_num,
+  REGISTER_PUBLIC_GLOBAL_VAR(FLAGS_rpc_get_thread_num);
-                             FLAGS_rpc_get_thread_num,
+  REGISTER_PUBLIC_GLOBAL_VAR(FLAGS_rpc_prefetch_thread_num);
-                             FLAGS_rpc_prefetch_thread_num);
 #endif
+  const auto &flag_map = platform::GetExportedFlagInfoMap();
+  for (const auto &pair : flag_map) {
+    const std::string &name = pair.second.name;
+    bool is_writable = pair.second.is_writable;
+    void *value_ptr = const_cast<void *>(pair.second.value_ptr);
+    const auto &default_value = pair.second.default_value;
+    RegisterGetterSetterVisitor visitor("FLAGS_" + name, is_writable,
+                                        value_ptr);
+    boost::apply_visitor(visitor, default_value);
+  }
 }
 }  // namespace pybind
 }  // namespace paddle
--- a/paddle/fluid/pybind/reader_py.cc
+++ b/paddle/fluid/pybind/reader_py.cc
@@ -32,9 +32,10 @@
 #include "paddle/fluid/platform/place.h"
 #include "pybind11/stl.h"
-DEFINE_bool(reader_queue_speed_test_mode, false,
+PADDLE_DEFINE_EXPORTED_bool(
-            "If set true, the queue.pop will only get data from queue but not "
+    reader_queue_speed_test_mode, false,
-            "remove the data from queue for speed testing");
+    "If set true, the queue.pop will only get data from queue but not "
+    "remove the data from queue for speed testing");
 namespace paddle {
 namespace pybind {

--- a/python/paddle/fluid/__init__.py
+++ b/python/paddle/fluid/__init__.py
@@ -176,83 +176,23 @@ def __bootstrap__():
        print('PLEASE USE OMP_NUM_THREADS WISELY.', file=sys.stderr)
    os.environ['OMP_NUM_THREADS'] = str(num_threads)
-    sysstr = platform.system()
+    flag_prefix = 'FLAGS_'
    read_env_flags = [
-        'check_nan_inf',
+        key[len(flag_prefix):] for key in core.globals().keys()
-        'convert_all_blocks',
+        if key.startswith(flag_prefix)
-        'benchmark',
-        'eager_delete_scope',
-        'fraction_of_cpu_memory_to_use',
-        'initial_cpu_memory_in_mb',
-        'init_allocated_mem',
-        'paddle_num_threads',
-        'dist_threadpool_size',
-        'eager_delete_tensor_gb',
-        'fast_eager_deletion_mode',
-        'memory_fraction_of_eager_deletion',
-        'allocator_strategy',
-        'reader_queue_speed_test_mode',
-        'print_sub_graph_dir',
-        'pe_profile_fname',
-        'inner_op_parallelism',
-        'enable_parallel_graph',
-        'fuse_parameter_groups_size',
-        'multiple_of_cupti_buffer_size',
-        'fuse_parameter_memory_size',
-        'tracer_profile_fname',
-        'dygraph_debug',
-        'use_system_allocator',
-        'enable_unused_var_check',
-        'free_idle_chunk',
-        'free_when_no_cache_hit',
-        'call_stack_level',
-        'sort_sum_gradient',
-        'max_inplace_grad_add',
-        'apply_pass_to_program',
-        'new_executor_use_inplace',
    ]
-    if 'Darwin' not in sysstr:
-        read_env_flags.append('use_pinned_memory')
-    if os.name != 'nt':
+    def remove_flag_if_exists(name):
-        read_env_flags.append('cpu_deterministic')
+        if name in read_env_flags:
+            read_env_flags.remove(name)
-    if core.is_compiled_with_mkldnn():
+    sysstr = platform.system()
-        read_env_flags.append('use_mkldnn')
+    if 'Darwin' in sysstr:
-        read_env_flags.append('tracer_mkldnn_ops_on')
+        remove_flags_if_exists('use_pinned_memory')
-        read_env_flags.append('tracer_mkldnn_ops_off')
-    if core.is_compiled_with_cuda():
-        read_env_flags += [
-            'fraction_of_gpu_memory_to_use',
-            'initial_gpu_memory_in_mb',
-            'reallocate_gpu_memory_in_mb',
-            'cudnn_deterministic',
-            'enable_cublas_tensor_op_math',
-            'conv_workspace_size_limit',
-            'cudnn_exhaustive_search',
-            'selected_gpus',
-            'sync_nccl_allreduce',
-            'cudnn_batchnorm_spatial_persistent',
-            'gpu_allocator_retry_time',
-            'local_exe_sub_scope_limit',
-            'gpu_memory_limit_mb',
-            'conv2d_disable_cudnn',
-            'get_host_by_name_time',
-        ]
-    if core.is_compiled_with_npu():
+    if os.name == 'nt':
-        read_env_flags += [
+        remove_flags_if_exists('cpu_deterministic')
-            'selected_npus',
-            'fraction_of_gpu_memory_to_use',
-            'initial_gpu_memory_in_mb',
-            'reallocate_gpu_memory_in_mb',
-            'gpu_memory_limit_mb',
-            'npu_config_path',
-            'get_host_by_name_time',
-            'hccl_check_nan',
-            'min_loss_scaling',
-        ]
    core.init_gflags(["--tryfromenv=" + ",".join(read_env_flags)])
    # Note(zhouwei25): sys may not have argv in some cases,