提交 ca0136a6 编写于 作者: S sneaxiy

make flag setter easier

上级 e93c18a3
...@@ -19,7 +19,7 @@ ...@@ -19,7 +19,7 @@
#include "paddle/fluid/framework/details/variable_visitor.h" #include "paddle/fluid/framework/details/variable_visitor.h"
#include "paddle/fluid/platform/profiler.h" #include "paddle/fluid/platform/profiler.h"
DEFINE_bool( PADDLE_DEFINE_EXPORTED_bool(
cpu_deterministic, false, cpu_deterministic, false,
"Whether to make the result of computation deterministic in CPU side."); "Whether to make the result of computation deterministic in CPU side.");
......
...@@ -25,13 +25,14 @@ class VarDesc; ...@@ -25,13 +25,14 @@ class VarDesc;
} // namespace framework } // namespace framework
} // namespace paddle } // namespace paddle
DEFINE_double(fuse_parameter_memory_size, -1.0, // MBytes PADDLE_DEFINE_EXPORTED_double(
"fuse_parameter_memory_size is up limited memory size(MB)" fuse_parameter_memory_size, -1.0, // MBytes
"of one group parameters' gradient which is the input " "fuse_parameter_memory_size is up limited memory size(MB)"
"of communication calling(e.g NCCLAllReduce). " "of one group parameters' gradient which is the input "
"The default value is 0, it means that " "of communication calling(e.g NCCLAllReduce). "
"not set group according to memory_size."); "The default value is 0, it means that "
DEFINE_int32( "not set group according to memory_size.");
PADDLE_DEFINE_EXPORTED_int32(
fuse_parameter_groups_size, 1, fuse_parameter_groups_size, 1,
"fuse_parameter_groups_size is the up limited size of one group " "fuse_parameter_groups_size is the up limited size of one group "
"parameters' gradient. " "parameters' gradient. "
......
...@@ -17,8 +17,8 @@ limitations under the License. */ ...@@ -17,8 +17,8 @@ limitations under the License. */
#include "paddle/fluid/framework/ir/graph.h" #include "paddle/fluid/framework/ir/graph.h"
#include "paddle/fluid/framework/operator.h" #include "paddle/fluid/framework/operator.h"
DEFINE_bool(convert_all_blocks, true, PADDLE_DEFINE_EXPORTED_bool(convert_all_blocks, true,
"Convert all blocks in program into SSAgraphs"); "Convert all blocks in program into SSAgraphs");
namespace paddle { namespace paddle {
namespace framework { namespace framework {
......
...@@ -18,9 +18,9 @@ limitations under the License. */ ...@@ -18,9 +18,9 @@ limitations under the License. */
#include "paddle/fluid/framework/op_proto_maker.h" #include "paddle/fluid/framework/op_proto_maker.h"
DECLARE_bool(convert_all_blocks); DECLARE_bool(convert_all_blocks);
DEFINE_string(print_sub_graph_dir, "", PADDLE_DEFINE_EXPORTED_string(print_sub_graph_dir, "",
"FLAGS_print_sub_graph_dir is used " "FLAGS_print_sub_graph_dir is used "
"to print the nodes of sub_graphs."); "to print the nodes of sub_graphs.");
namespace paddle { namespace paddle {
namespace framework { namespace framework {
......
...@@ -47,7 +47,8 @@ class LoDTensor; ...@@ -47,7 +47,8 @@ class LoDTensor;
DECLARE_bool(benchmark); DECLARE_bool(benchmark);
DECLARE_bool(check_nan_inf); DECLARE_bool(check_nan_inf);
DECLARE_bool(enable_unused_var_check); DECLARE_bool(enable_unused_var_check);
DEFINE_int32(inner_op_parallelism, 0, "number of threads for inner op"); PADDLE_DEFINE_EXPORTED_int32(inner_op_parallelism, 0,
"number of threads for inner op");
namespace paddle { namespace paddle {
namespace framework { namespace framework {
......
...@@ -46,11 +46,13 @@ DECLARE_double(eager_delete_tensor_gb); ...@@ -46,11 +46,13 @@ DECLARE_double(eager_delete_tensor_gb);
#ifdef WITH_GPERFTOOLS #ifdef WITH_GPERFTOOLS
#include "gperftools/profiler.h" #include "gperftools/profiler.h"
#endif #endif
DEFINE_string(pe_profile_fname, "", PADDLE_DEFINE_EXPORTED_string(
"Profiler filename for PE, which generated by gperftools." pe_profile_fname, "",
"Only valid when compiled `WITH_PRIFILER=ON`. Empty if disable."); "Profiler filename for PE, which generated by gperftools."
DEFINE_bool(enable_parallel_graph, false, "Only valid when compiled `WITH_PRIFILER=ON`. Empty if disable.");
"Force disable parallel graph execution mode if set false."); PADDLE_DEFINE_EXPORTED_bool(
enable_parallel_graph, false,
"Force disable parallel graph execution mode if set false.");
namespace paddle { namespace paddle {
namespace framework { namespace framework {
......
...@@ -19,7 +19,7 @@ limitations under the License. */ ...@@ -19,7 +19,7 @@ limitations under the License. */
DECLARE_bool(benchmark); DECLARE_bool(benchmark);
DEFINE_bool( PADDLE_DEFINE_EXPORTED_bool(
eager_delete_scope, true, eager_delete_scope, true,
"Delete local scope eagerly. It will reduce GPU memory usage but " "Delete local scope eagerly. It will reduce GPU memory usage but "
"slow down the destruction of variables.(around 1% performance harm)"); "slow down the destruction of variables.(around 1% performance harm)");
......
...@@ -17,15 +17,16 @@ limitations under the License. */ ...@@ -17,15 +17,16 @@ limitations under the License. */
#include <glog/logging.h> #include <glog/logging.h>
#include <string> #include <string>
#include "gflags/gflags.h"
#include "paddle/fluid/framework/no_need_buffer_vars_inference.h" #include "paddle/fluid/framework/no_need_buffer_vars_inference.h"
#include "paddle/fluid/framework/op_info.h" #include "paddle/fluid/framework/op_info.h"
#include "paddle/fluid/framework/operator.h" #include "paddle/fluid/framework/operator.h"
#include "paddle/fluid/platform/enforce.h" #include "paddle/fluid/platform/enforce.h"
#include "paddle/fluid/platform/flags.h"
DEFINE_bool(enable_unused_var_check, false, PADDLE_DEFINE_EXPORTED_bool(
"Checking whether operator contains unused inputs, " enable_unused_var_check, false,
"especially for grad operator. It should be in unittest."); "Checking whether operator contains unused inputs, "
"especially for grad operator. It should be in unittest.");
namespace paddle { namespace paddle {
namespace framework { namespace framework {
......
...@@ -11,7 +11,7 @@ cc_library(amp SRCS amp_auto_cast.cc DEPS layer ) ...@@ -11,7 +11,7 @@ cc_library(amp SRCS amp_auto_cast.cc DEPS layer )
cc_library(tracer SRCS tracer.cc DEPS layer engine program_desc_tracer amp denormal) cc_library(tracer SRCS tracer.cc DEPS layer engine program_desc_tracer amp denormal)
cc_library(basic_engine SRCS basic_engine.cc DEPS layer gradient_accumulator) cc_library(basic_engine SRCS basic_engine.cc DEPS layer gradient_accumulator)
cc_library(engine SRCS basic_engine.cc partial_grad_engine.cc DEPS layer gradient_accumulator) cc_library(engine SRCS basic_engine.cc partial_grad_engine.cc DEPS layer gradient_accumulator)
cc_library(imperative_profiler SRCS profiler.cc) cc_library(imperative_profiler SRCS profiler.cc DEPS flags)
if(NOT WIN32) if(NOT WIN32)
if(WITH_NCCL OR WITH_RCCL) if(WITH_NCCL OR WITH_RCCL)
cc_library(imperative_all_reduce SRCS all_reduce.cc DEPS collective_helper device_context selected_rows tensor) cc_library(imperative_all_reduce SRCS all_reduce.cc DEPS collective_helper device_context selected_rows tensor)
......
...@@ -13,11 +13,11 @@ ...@@ -13,11 +13,11 @@
// limitations under the License. // limitations under the License.
#include "paddle/fluid/imperative/flags.h" #include "paddle/fluid/imperative/flags.h"
#include "gflags/gflags.h" #include "paddle/fluid/platform/flags.h"
DEFINE_uint64(dygraph_debug, 0, PADDLE_DEFINE_EXPORTED_uint64(dygraph_debug, 0,
"Debug level of dygraph. This flag is not " "Debug level of dygraph. This flag is not "
"open to users"); "open to users");
namespace paddle { namespace paddle {
namespace imperative { namespace imperative {
......
...@@ -19,9 +19,9 @@ ...@@ -19,9 +19,9 @@
#endif #endif
#include <glog/logging.h> #include <glog/logging.h>
#include <mutex> // NOLINT #include <mutex> // NOLINT
#include "gflags/gflags.h" #include "paddle/fluid/platform/flags.h"
DEFINE_string( PADDLE_DEFINE_EXPORTED_string(
tracer_profile_fname, "xxgperf", tracer_profile_fname, "xxgperf",
"Profiler filename for imperative tracer, which generated by gperftools." "Profiler filename for imperative tracer, which generated by gperftools."
"Only valid when compiled `WITH_PROFILER=ON`. Empty if disable."); "Only valid when compiled `WITH_PROFILER=ON`. Empty if disable.");
......
...@@ -99,7 +99,7 @@ cc_test(allocator_facade_abs_flags_test SRCS allocator_facade_abs_flags_test.cc ...@@ -99,7 +99,7 @@ cc_test(allocator_facade_abs_flags_test SRCS allocator_facade_abs_flags_test.cc
cc_test(allocator_facade_frac_flags_test SRCS allocator_facade_frac_flags_test.cc DEPS allocator_facade) cc_test(allocator_facade_frac_flags_test SRCS allocator_facade_frac_flags_test.cc DEPS allocator_facade)
cc_library(auto_growth_best_fit_allocator SRCS auto_growth_best_fit_allocator.cc DEPS allocator aligned_allocator) cc_library(auto_growth_best_fit_allocator SRCS auto_growth_best_fit_allocator.cc DEPS allocator aligned_allocator flags)
cc_test(auto_growth_best_fit_allocator_facade_test SRCS auto_growth_best_fit_allocator_facade_test.cc DEPS cpu_allocator auto_growth_best_fit_allocator) cc_test(auto_growth_best_fit_allocator_facade_test SRCS auto_growth_best_fit_allocator_facade_test.cc DEPS cpu_allocator auto_growth_best_fit_allocator)
cc_test(auto_growth_best_fit_allocator_test SRCS auto_growth_best_fit_allocator_test.cc DEPS auto_growth_best_fit_allocator) cc_test(auto_growth_best_fit_allocator_test SRCS auto_growth_best_fit_allocator_test.cc DEPS auto_growth_best_fit_allocator)
......
...@@ -37,14 +37,15 @@ ...@@ -37,14 +37,15 @@
#endif #endif
#include "paddle/fluid/platform/npu_info.h" #include "paddle/fluid/platform/npu_info.h"
DEFINE_int64( PADDLE_DEFINE_EXPORTED_int64(
gpu_allocator_retry_time, 10000, gpu_allocator_retry_time, 10000,
"The retry time (milliseconds) when allocator fails " "The retry time (milliseconds) when allocator fails "
"to allocate memory. No retry if this value is not greater than 0"); "to allocate memory. No retry if this value is not greater than 0");
DEFINE_bool(use_system_allocator, false, PADDLE_DEFINE_EXPORTED_bool(
"Whether to use system allocator to allocate CPU and GPU memory. " use_system_allocator, false,
"Only used for unittests."); "Whether to use system allocator to allocate CPU and GPU memory. "
"Only used for unittests.");
namespace paddle { namespace paddle {
namespace memory { namespace memory {
......
...@@ -17,18 +17,21 @@ ...@@ -17,18 +17,21 @@
#include <algorithm> #include <algorithm>
#include <mutex> // NOLINT #include <mutex> // NOLINT
#include "paddle/fluid/memory/allocation/aligned_allocator.h" #include "paddle/fluid/memory/allocation/aligned_allocator.h"
#include "paddle/fluid/platform/flags.h"
DEFINE_bool(free_idle_chunk, false,
"Whether to free idle chunk when each allocation is freed. " PADDLE_DEFINE_READONLY_EXPORTED_bool(
"If false, all freed allocation would be cached to speed up next " free_idle_chunk, false,
"allocation request. If true, no allocation would be cached. This " "Whether to free idle chunk when each allocation is freed. "
"flag only works when FLAGS_allocator_strategy=auto_growth."); "If false, all freed allocation would be cached to speed up next "
"allocation request. If true, no allocation would be cached. This "
DEFINE_bool(free_when_no_cache_hit, false, "flag only works when FLAGS_allocator_strategy=auto_growth.");
"Whether to free idle chunks when no cache hit. If true, idle "
"chunk would be freed when no cache hit; if false, idle " PADDLE_DEFINE_READONLY_EXPORTED_bool(
"chunk would be freed when out of memory occurs. This flag " free_when_no_cache_hit, false,
"only works when FLAGS_allocator_strategy=auto_growth."); "Whether to free idle chunks when no cache hit. If true, idle "
"chunk would be freed when no cache hit; if false, idle "
"chunk would be freed when out of memory occurs. This flag "
"only works when FLAGS_allocator_strategy=auto_growth.");
namespace paddle { namespace paddle {
namespace memory { namespace memory {
......
...@@ -34,12 +34,13 @@ ...@@ -34,12 +34,13 @@
#include "paddle/fluid/platform/xpu/xpu_header.h" #include "paddle/fluid/platform/xpu/xpu_header.h"
#endif #endif
DEFINE_bool(init_allocated_mem, false, PADDLE_DEFINE_EXPORTED_bool(
"It is a mistake that the values of the memory allocated by " init_allocated_mem, false,
"BuddyAllocator are always zeroed in some op's implementation. " "It is a mistake that the values of the memory allocated by "
"To find this error in time, we use init_allocated_mem to indicate " "BuddyAllocator are always zeroed in some op's implementation. "
"that initializing the allocated memory with a small value " "To find this error in time, we use init_allocated_mem to indicate "
"during unit testing."); "that initializing the allocated memory with a small value "
"during unit testing.");
DECLARE_double(fraction_of_gpu_memory_to_use); DECLARE_double(fraction_of_gpu_memory_to_use);
DECLARE_uint64(initial_gpu_memory_in_mb); DECLARE_uint64(initial_gpu_memory_in_mb);
DECLARE_uint64(reallocate_gpu_memory_in_mb); DECLARE_uint64(reallocate_gpu_memory_in_mb);
......
...@@ -15,7 +15,8 @@ limitations under the License. */ ...@@ -15,7 +15,8 @@ limitations under the License. */
#include "paddle/fluid/operators/pscore/heter_listen_and_serv_op.h" #include "paddle/fluid/operators/pscore/heter_listen_and_serv_op.h"
#include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/op_registry.h"
DEFINE_int32(rpc_send_thread_num, 12, "number of threads for rpc send"); PADDLE_DEFINE_EXPORTED_int32(rpc_send_thread_num, 12,
"number of threads for rpc send");
namespace paddle { namespace paddle {
namespace operators { namespace operators {
......
...@@ -37,13 +37,13 @@ if (WITH_PYTHON) ...@@ -37,13 +37,13 @@ if (WITH_PYTHON)
endif(NOT WIN32) endif(NOT WIN32)
endif() endif()
cc_library(flags SRCS flags.cc DEPS gflags) cc_library(flags SRCS flags.cc DEPS gflags boost)
cc_library(denormal SRCS denormal.cc DEPS) cc_library(denormal SRCS denormal.cc DEPS)
cc_library(errors SRCS errors.cc DEPS error_codes_proto) cc_library(errors SRCS errors.cc DEPS error_codes_proto)
cc_test(errors_test SRCS errors_test.cc DEPS errors enforce) cc_test(errors_test SRCS errors_test.cc DEPS errors enforce)
set(enforce_deps flags errors boost) set(enforce_deps flags errors boost flags)
if(WITH_GPU) if(WITH_GPU)
set(enforce_deps ${enforce_deps} external_error_proto) set(enforce_deps ${enforce_deps} external_error_proto)
endif() endif()
......
...@@ -31,7 +31,7 @@ limitations under the License. */ ...@@ -31,7 +31,7 @@ limitations under the License. */
#endif // _WIN32 #endif // _WIN32
#include <algorithm> #include <algorithm>
#include "gflags/gflags.h" #include "paddle/fluid/platform/flags.h"
DECLARE_double(fraction_of_cpu_memory_to_use); DECLARE_double(fraction_of_cpu_memory_to_use);
DECLARE_uint64(initial_cpu_memory_in_mb); DECLARE_uint64(initial_cpu_memory_in_mb);
...@@ -42,7 +42,8 @@ DECLARE_double(fraction_of_cuda_pinned_memory_to_use); ...@@ -42,7 +42,8 @@ DECLARE_double(fraction_of_cuda_pinned_memory_to_use);
// between host and device. Allocates too much would reduce the amount // between host and device. Allocates too much would reduce the amount
// of memory available to the system for paging. So, by default, we // of memory available to the system for paging. So, by default, we
// should set false to use_pinned_memory. // should set false to use_pinned_memory.
DEFINE_bool(use_pinned_memory, true, "If set, allocate cpu pinned memory."); PADDLE_DEFINE_EXPORTED_bool(use_pinned_memory, true,
"If set, allocate cpu pinned memory.");
namespace paddle { namespace paddle {
namespace platform { namespace platform {
...@@ -54,7 +55,9 @@ size_t CpuTotalPhysicalMemory() { ...@@ -54,7 +55,9 @@ size_t CpuTotalPhysicalMemory() {
mib[1] = HW_MEMSIZE; mib[1] = HW_MEMSIZE;
int64_t size = 0; int64_t size = 0;
size_t len = sizeof(size); size_t len = sizeof(size);
if (sysctl(mib, 2, &size, &len, NULL, 0) == 0) return (size_t)size; if (sysctl(mib, 2, &size, &len, NULL, 0) == 0) {
return static_cast<size_t>(size);
}
return 0L; return 0L;
#elif defined(_WIN32) #elif defined(_WIN32)
MEMORYSTATUSEX sMeminfo; MEMORYSTATUSEX sMeminfo;
......
...@@ -101,6 +101,7 @@ limitations under the License. */ ...@@ -101,6 +101,7 @@ limitations under the License. */
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
#include "paddle/fluid/platform/type_defs.h" #include "paddle/fluid/platform/type_defs.h"
#endif #endif
#include "paddle/fluid/platform/flags.h"
namespace paddle { namespace paddle {
namespace platform { namespace platform {
......
此差异已折叠。
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <cstdint>
#include <map>
#include <string>
#include <type_traits>
#include <typeindex>
#include "boost/variant.hpp"
#include "gflags/gflags.h"
#include "paddle/fluid/platform/macros.h"
namespace paddle {
namespace platform {
struct FlagInfo {
using ValueType =
boost::variant<bool, int32_t, int64_t, uint64_t, double, std::string>;
std::string name;
void *value_ptr;
ValueType default_value;
std::string doc;
bool is_writable;
};
using ExportedFlagInfoMap = std::map<std::string, FlagInfo>;
const ExportedFlagInfoMap &GetExportedFlagInfoMap();
#define __PADDLE_DEFINE_EXPORTED_FLAG(__name, __is_writable, __cpp_type, \
__gflag_type, __default_value, __doc) \
DEFINE_##__gflag_type(__name, __default_value, __doc); \
struct __PaddleRegisterFlag_##__name { \
__PaddleRegisterFlag_##__name() { \
const auto &instance = ::paddle::platform::GetExportedFlagInfoMap(); \
using Type = ::paddle::platform::ExportedFlagInfoMap; \
auto &info = const_cast<Type &>(instance)[#__name]; \
info.name = #__name; \
info.value_ptr = &(FLAGS_##__name); \
info.default_value = static_cast<__cpp_type>(__default_value); \
info.doc = __doc; \
info.is_writable = __is_writable; \
} \
}; \
static_assert(std::is_same<__PaddleRegisterFlag_##__name, \
::__PaddleRegisterFlag_##__name>::value, \
"FLAGS should define in global namespace"); \
static __PaddleRegisterFlag_##__name __PaddleRegisterFlag_instance##__name
#define PADDLE_DEFINE_EXPORTED_bool(name, default_value, doc) \
__PADDLE_DEFINE_EXPORTED_FLAG(name, true, bool, bool, default_value, doc)
#define PADDLE_DEFINE_READONLY_EXPORTED_bool(name, default_value, doc) \
__PADDLE_DEFINE_EXPORTED_FLAG(name, false, bool, bool, default_value, doc)
#define PADDLE_DEFINE_EXPORTED_int32(name, default_value, doc) \
__PADDLE_DEFINE_EXPORTED_FLAG(name, true, int32_t, int32, default_value, doc)
#define PADDLE_DEFINE_EXPORTED_int64(name, default_value, doc) \
__PADDLE_DEFINE_EXPORTED_FLAG(name, true, int64_t, int64, default_value, doc)
#define PADDLE_DEFINE_EXPORTED_uint64(name, default_value, doc) \
__PADDLE_DEFINE_EXPORTED_FLAG(name, true, uint64_t, uint64, default_value, \
doc)
#define PADDLE_DEFINE_EXPORTED_double(name, default_value, doc) \
__PADDLE_DEFINE_EXPORTED_FLAG(name, true, double, double, default_value, doc)
#define PADDLE_DEFINE_EXPORTED_string(name, default_value, doc) \
__PADDLE_DEFINE_EXPORTED_FLAG(name, true, ::std::string, string, \
default_value, doc)
} // namespace platform
} // namespace paddle
...@@ -43,9 +43,10 @@ limitations under the License. */ ...@@ -43,9 +43,10 @@ limitations under the License. */
#endif #endif
DECLARE_int32(paddle_num_threads); DECLARE_int32(paddle_num_threads);
DEFINE_int32(multiple_of_cupti_buffer_size, 1, PADDLE_DEFINE_EXPORTED_int32(
"Multiple of the CUPTI device buffer size. If the timestamps have " multiple_of_cupti_buffer_size, 1,
"been dropped when you are profiling, try increasing this value."); "Multiple of the CUPTI device buffer size. If the timestamps have "
"been dropped when you are profiling, try increasing this value.");
namespace paddle { namespace paddle {
namespace platform { namespace platform {
......
...@@ -14,11 +14,12 @@ limitations under the License. */ ...@@ -14,11 +14,12 @@ limitations under the License. */
#include "paddle/fluid/platform/place.h" #include "paddle/fluid/platform/place.h"
DEFINE_bool(benchmark, false, PADDLE_DEFINE_EXPORTED_bool(
"Doing memory benchmark. It will make deleting scope synchronized, " benchmark, false,
"and add some memory usage logs." "Doing memory benchmark. It will make deleting scope synchronized, "
"Default cuda is asynchronous device, set to True will" "and add some memory usage logs."
"force op run in synchronous mode."); "Default cuda is asynchronous device, set to True will"
"force op run in synchronous mode.");
namespace paddle { namespace paddle {
namespace platform { namespace platform {
......
...@@ -24,7 +24,8 @@ limitations under the License. */ ...@@ -24,7 +24,8 @@ limitations under the License. */
#include "paddle/fluid/platform/dynload/nvtx.h" #include "paddle/fluid/platform/dynload/nvtx.h"
#endif #endif
DEFINE_bool(enable_rpc_profiler, false, "Enable rpc profiler or not."); PADDLE_DEFINE_EXPORTED_bool(enable_rpc_profiler, false,
"Enable rpc profiler or not.");
namespace paddle { namespace paddle {
namespace platform { namespace platform {
......
...@@ -18,14 +18,15 @@ limitations under the License. */ ...@@ -18,14 +18,15 @@ limitations under the License. */
#include "paddle/fluid/platform/xpu/xpu_header.h" #include "paddle/fluid/platform/xpu/xpu_header.h"
#include "paddle/fluid/string/split.h" #include "paddle/fluid/string/split.h"
DEFINE_string(selected_xpus, "", PADDLE_DEFINE_EXPORTED_string(
"A list of device ids separated by comma, like: 0,1,2,3. " selected_xpus, "",
"This option is useful when doing multi process training and " "A list of device ids separated by comma, like: 0,1,2,3. "
"each process have only one device (XPU). If you want to use " "This option is useful when doing multi process training and "
"all visible devices, set this to empty string. NOTE: the " "each process have only one device (XPU). If you want to use "
"reason of doing this is that we want to use P2P communication" "all visible devices, set this to empty string. NOTE: the "
"between XPU devices, use XPU_VISIBLE_DEVICES can only use" "reason of doing this is that we want to use P2P communication"
"share-memory only."); "between XPU devices, use XPU_VISIBLE_DEVICES can only use"
"share-memory only.");
namespace paddle { namespace paddle {
namespace platform { namespace platform {
......
...@@ -29,82 +29,8 @@ ...@@ -29,82 +29,8 @@
#include "paddle/fluid/platform/macros.h" #include "paddle/fluid/platform/macros.h"
#include "pybind11/stl.h" #include "pybind11/stl.h"
// data processing // NOTE: where is these 2 flags from?
DECLARE_bool(use_mkldnn);
DECLARE_string(tracer_mkldnn_ops_on);
DECLARE_string(tracer_mkldnn_ops_off);
// debug
DECLARE_bool(check_nan_inf);
DECLARE_bool(cpu_deterministic);
DECLARE_bool(enable_rpc_profiler);
DECLARE_int32(multiple_of_cupti_buffer_size);
DECLARE_bool(reader_queue_speed_test_mode);
DECLARE_int32(call_stack_level);
DECLARE_bool(sort_sum_gradient);
DECLARE_bool(check_kernel_launch);
// device management
DECLARE_int32(paddle_num_threads);
// executor
DECLARE_bool(enable_parallel_graph);
DECLARE_string(pe_profile_fname);
DECLARE_string(print_sub_graph_dir);
DECLARE_bool(use_ngraph);
// memory management
DECLARE_string(allocator_strategy);
DECLARE_double(eager_delete_tensor_gb);
DECLARE_double(fraction_of_cpu_memory_to_use);
DECLARE_bool(free_idle_chunk);
DECLARE_bool(free_when_no_cache_hit);
DECLARE_int32(fuse_parameter_groups_size);
DECLARE_double(fuse_parameter_memory_size);
DECLARE_bool(init_allocated_mem);
DECLARE_uint64(initial_cpu_memory_in_mb);
DECLARE_double(memory_fraction_of_eager_deletion);
DECLARE_bool(use_pinned_memory);
DECLARE_bool(use_system_allocator);
// others
DECLARE_bool(benchmark);
DECLARE_int32(inner_op_parallelism);
DECLARE_int32(max_inplace_grad_add);
DECLARE_string(tracer_profile_fname);
DECLARE_bool(apply_pass_to_program);
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
// cudnn
DECLARE_uint64(conv_workspace_size_limit);
DECLARE_bool(cudnn_batchnorm_spatial_persistent);
DECLARE_bool(cudnn_deterministic);
DECLARE_bool(cudnn_exhaustive_search);
DECLARE_bool(conv2d_disable_cudnn);
// data processing
DECLARE_bool(enable_cublas_tensor_op_math);
// device management
DECLARE_string(selected_gpus);
// memory management
DECLARE_bool(eager_delete_scope);
DECLARE_bool(fast_eager_deletion_mode);
DECLARE_double(fraction_of_cuda_pinned_memory_to_use);
DECLARE_double(fraction_of_gpu_memory_to_use);
DECLARE_uint64(gpu_memory_limit_mb);
DECLARE_uint64(initial_gpu_memory_in_mb);
DECLARE_uint64(reallocate_gpu_memory_in_mb);
// others
DECLARE_bool(sync_nccl_allreduce);
#endif
#ifdef PADDLE_WITH_XPU
// device management
DECLARE_string(selected_xpus);
#endif
#ifdef PADDLE_WITH_ASCEND_CL
// device management
DECLARE_string(selected_npus);
// set minmum loss scaling value
DECLARE_int32(min_loss_scaling);
#endif
#ifdef PADDLE_WITH_DISTRIBUTE #ifdef PADDLE_WITH_DISTRIBUTE
DECLARE_int32(rpc_send_thread_num);
DECLARE_int32(rpc_get_thread_num); DECLARE_int32(rpc_get_thread_num);
DECLARE_int32(rpc_prefetch_thread_num); DECLARE_int32(rpc_prefetch_thread_num);
#endif #endif
...@@ -181,7 +107,6 @@ class PYBIND11_HIDDEN GlobalVarGetterSetterRegistry { ...@@ -181,7 +107,6 @@ class PYBIND11_HIDDEN GlobalVarGetterSetterRegistry {
PADDLE_ENFORCE_NOT_NULL(setter, PADDLE_ENFORCE_NOT_NULL(setter,
platform::errors::InvalidArgument( platform::errors::InvalidArgument(
"Setter of %s should not be null", name)); "Setter of %s should not be null", name));
var_infos_.insert({name, VarInfo(is_public, getter, setter)}); var_infos_.insert({name, VarInfo(is_public, getter, setter)});
} }
...@@ -243,81 +168,6 @@ class PYBIND11_HIDDEN GlobalVarGetterSetterRegistry { ...@@ -243,81 +168,6 @@ class PYBIND11_HIDDEN GlobalVarGetterSetterRegistry {
GlobalVarGetterSetterRegistry GlobalVarGetterSetterRegistry::instance_; GlobalVarGetterSetterRegistry GlobalVarGetterSetterRegistry::instance_;
class GlobalVarGetterSetterRegistryHelper {
public:
GlobalVarGetterSetterRegistryHelper(bool is_public, bool is_writable,
const std::string &var_names)
: is_public_(is_public),
is_writable_(is_writable),
var_names_(SplitVarNames(var_names)) {}
template <typename... Args>
void Register(Args &&... args) const {
Impl<0, sizeof...(args) == 1, Args...>::Register(
is_public_, is_writable_, var_names_, std::forward<Args>(args)...);
}
private:
static std::vector<std::string> SplitVarNames(const std::string &names) {
auto valid_char = [](char ch) { return !std::isspace(ch) && ch != ','; };
std::vector<std::string> ret;
size_t i = 0, j = 0, n = names.size();
while (i < n) {
for (; i < n && !valid_char(names[i]); ++i) {
}
for (j = i + 1; j < n && valid_char(names[j]); ++j) {
}
if (i < n && j <= n) {
auto substring = names.substr(i, j - i);
VLOG(10) << "Get substring: \"" << substring << "\"";
ret.emplace_back(substring);
}
i = j + 1;
}
return ret;
}
private:
template <size_t kIdx, bool kIsStop, typename T, typename... Args>
struct Impl {
static void Register(bool is_public, bool is_writable,
const std::vector<std::string> &var_names, T &&var,
Args &&... args) {
PADDLE_ENFORCE_EQ(kIdx + 1 + sizeof...(args), var_names.size(),
platform::errors::InvalidArgument(
"Argument number not match name number"));
Impl<kIdx, true, T>::Register(is_public, is_writable, var_names, var);
Impl<kIdx + 1, sizeof...(Args) == 1, Args...>::Register(
is_public, is_writable, var_names, std::forward<Args>(args)...);
}
};
template <size_t kIdx, typename T>
struct Impl<kIdx, true, T> {
static void Register(bool is_public, bool is_writable,
const std::vector<std::string> &var_names, T &&var) {
auto *instance = GlobalVarGetterSetterRegistry::MutableInstance();
if (is_writable) {
instance->Register(
var_names[kIdx], is_public,
GlobalVarGetterSetterRegistry::CreateGetter(std::forward<T>(var)),
GlobalVarGetterSetterRegistry::CreateSetter(&var));
} else {
instance->Register(
var_names[kIdx], is_public,
GlobalVarGetterSetterRegistry::CreateGetter(std::forward<T>(var)));
}
}
};
private:
const bool is_public_;
const bool is_writable_;
const std::vector<std::string> var_names_;
};
static void RegisterGlobalVarGetterSetter(); static void RegisterGlobalVarGetterSetter();
void BindGlobalValueGetterSetter(pybind11::module *module) { void BindGlobalValueGetterSetter(pybind11::module *module) {
...@@ -338,65 +188,51 @@ void BindGlobalValueGetterSetter(pybind11::module *module) { ...@@ -338,65 +188,51 @@ void BindGlobalValueGetterSetter(pybind11::module *module) {
} }
/* Public vars are designed to be writable. */ /* Public vars are designed to be writable. */
#define REGISTER_PUBLIC_GLOBAL_VAR(...) \ #define REGISTER_PUBLIC_GLOBAL_VAR(var) \
do { \ do { \
GlobalVarGetterSetterRegistryHelper(/*is_public=*/true, \ auto *instance = GlobalVarGetterSetterRegistry::MutableInstance(); \
/*is_writable=*/true, "" #__VA_ARGS__) \ instance->Register(#var, /*is_public=*/true, \
.Register(__VA_ARGS__); \ GlobalVarGetterSetterRegistry::CreateGetter(var), \
GlobalVarGetterSetterRegistry::CreateSetter(&var)); \
} while (0) } while (0)
#define REGISTER_PRIVATE_GLOBAL_VAR(is_writable, ...) \ struct RegisterGetterSetterVisitor : public boost::static_visitor<void> {
do { \ RegisterGetterSetterVisitor(const std::string &name, bool is_public,
GlobalVarGetterSetterRegistryHelper(/*is_public=*/false, is_writable, \ void *value_ptr)
"" #__VA_ARGS__) \ : name_(name), value_ptr_(value_ptr) {}
.Register(__VA_ARGS__); \
} while (0)
static void RegisterGlobalVarGetterSetter() { template <typename T>
REGISTER_PRIVATE_GLOBAL_VAR(/*is_writable=*/false, FLAGS_free_idle_chunk, void operator()(const T &) const {
FLAGS_free_when_no_cache_hit); auto &value = *static_cast<T *>(value_ptr_);
auto *instance = GlobalVarGetterSetterRegistry::MutableInstance();
REGISTER_PUBLIC_GLOBAL_VAR( instance->Register(name_, is_public_,
FLAGS_eager_delete_tensor_gb, FLAGS_enable_parallel_graph, GlobalVarGetterSetterRegistry::CreateGetter(value),
FLAGS_allocator_strategy, FLAGS_use_system_allocator, FLAGS_check_nan_inf, GlobalVarGetterSetterRegistry::CreateSetter(&value));
FLAGS_call_stack_level, FLAGS_sort_sum_gradient, FLAGS_cpu_deterministic, }
FLAGS_enable_rpc_profiler, FLAGS_multiple_of_cupti_buffer_size,
FLAGS_reader_queue_speed_test_mode, FLAGS_pe_profile_fname,
FLAGS_print_sub_graph_dir, FLAGS_fraction_of_cpu_memory_to_use,
FLAGS_fuse_parameter_groups_size, FLAGS_fuse_parameter_memory_size,
FLAGS_init_allocated_mem, FLAGS_initial_cpu_memory_in_mb,
FLAGS_memory_fraction_of_eager_deletion, FLAGS_use_pinned_memory,
FLAGS_benchmark, FLAGS_inner_op_parallelism, FLAGS_tracer_profile_fname,
FLAGS_paddle_num_threads, FLAGS_use_mkldnn, FLAGS_max_inplace_grad_add,
FLAGS_tracer_mkldnn_ops_on, FLAGS_tracer_mkldnn_ops_off,
FLAGS_apply_pass_to_program);
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
REGISTER_PUBLIC_GLOBAL_VAR(
FLAGS_gpu_memory_limit_mb, FLAGS_cudnn_deterministic,
FLAGS_conv_workspace_size_limit, FLAGS_cudnn_batchnorm_spatial_persistent,
FLAGS_cudnn_exhaustive_search, FLAGS_eager_delete_scope,
FLAGS_fast_eager_deletion_mode,
FLAGS_fraction_of_cuda_pinned_memory_to_use,
FLAGS_fraction_of_gpu_memory_to_use, FLAGS_initial_gpu_memory_in_mb,
FLAGS_reallocate_gpu_memory_in_mb, FLAGS_enable_cublas_tensor_op_math,
FLAGS_selected_gpus, FLAGS_sync_nccl_allreduce,
FLAGS_conv2d_disable_cudnn, FLAGS_check_kernel_launch);
#endif
#ifdef PADDLE_WITH_XPU
REGISTER_PUBLIC_GLOBAL_VAR(FLAGS_selected_xpus);
#endif
#ifdef PADDLE_WITH_ASCEND_CL private:
REGISTER_PUBLIC_GLOBAL_VAR(FLAGS_selected_npus); std::string name_;
REGISTER_PUBLIC_GLOBAL_VAR(FLAGS_min_loss_scaling); bool is_public_;
#endif void *value_ptr_;
};
static void RegisterGlobalVarGetterSetter() {
#ifdef PADDLE_WITH_DITRIBUTE #ifdef PADDLE_WITH_DITRIBUTE
REGISTER_PUBLIC_GLOBAL_VAR(FLAGS_rpc_send_thread_num, REGISTER_PUBLIC_GLOBAL_VAR(FLAGS_rpc_get_thread_num);
FLAGS_rpc_get_thread_num, REGISTER_PUBLIC_GLOBAL_VAR(FLAGS_rpc_prefetch_thread_num);
FLAGS_rpc_prefetch_thread_num);
#endif #endif
const auto &flag_map = platform::GetExportedFlagInfoMap();
for (const auto &pair : flag_map) {
const std::string &name = pair.second.name;
bool is_writable = pair.second.is_writable;
void *value_ptr = const_cast<void *>(pair.second.value_ptr);
const auto &default_value = pair.second.default_value;
RegisterGetterSetterVisitor visitor("FLAGS_" + name, is_writable,
value_ptr);
boost::apply_visitor(visitor, default_value);
}
} }
} // namespace pybind } // namespace pybind
} // namespace paddle } // namespace paddle
...@@ -32,9 +32,10 @@ ...@@ -32,9 +32,10 @@
#include "paddle/fluid/platform/place.h" #include "paddle/fluid/platform/place.h"
#include "pybind11/stl.h" #include "pybind11/stl.h"
DEFINE_bool(reader_queue_speed_test_mode, false, PADDLE_DEFINE_EXPORTED_bool(
"If set true, the queue.pop will only get data from queue but not " reader_queue_speed_test_mode, false,
"remove the data from queue for speed testing"); "If set true, the queue.pop will only get data from queue but not "
"remove the data from queue for speed testing");
namespace paddle { namespace paddle {
namespace pybind { namespace pybind {
......
...@@ -176,83 +176,23 @@ def __bootstrap__(): ...@@ -176,83 +176,23 @@ def __bootstrap__():
print('PLEASE USE OMP_NUM_THREADS WISELY.', file=sys.stderr) print('PLEASE USE OMP_NUM_THREADS WISELY.', file=sys.stderr)
os.environ['OMP_NUM_THREADS'] = str(num_threads) os.environ['OMP_NUM_THREADS'] = str(num_threads)
sysstr = platform.system()
flag_prefix = 'FLAGS_'
read_env_flags = [ read_env_flags = [
'check_nan_inf', key[len(flag_prefix):] for key in core.globals().keys()
'convert_all_blocks', if key.startswith(flag_prefix)
'benchmark',
'eager_delete_scope',
'fraction_of_cpu_memory_to_use',
'initial_cpu_memory_in_mb',
'init_allocated_mem',
'paddle_num_threads',
'dist_threadpool_size',
'eager_delete_tensor_gb',
'fast_eager_deletion_mode',
'memory_fraction_of_eager_deletion',
'allocator_strategy',
'reader_queue_speed_test_mode',
'print_sub_graph_dir',
'pe_profile_fname',
'inner_op_parallelism',
'enable_parallel_graph',
'fuse_parameter_groups_size',
'multiple_of_cupti_buffer_size',
'fuse_parameter_memory_size',
'tracer_profile_fname',
'dygraph_debug',
'use_system_allocator',
'enable_unused_var_check',
'free_idle_chunk',
'free_when_no_cache_hit',
'call_stack_level',
'sort_sum_gradient',
'max_inplace_grad_add',
'apply_pass_to_program',
'new_executor_use_inplace',
] ]
if 'Darwin' not in sysstr:
read_env_flags.append('use_pinned_memory')
if os.name != 'nt': def remove_flag_if_exists(name):
read_env_flags.append('cpu_deterministic') if name in read_env_flags:
read_env_flags.remove(name)
if core.is_compiled_with_mkldnn(): sysstr = platform.system()
read_env_flags.append('use_mkldnn') if 'Darwin' in sysstr:
read_env_flags.append('tracer_mkldnn_ops_on') remove_flags_if_exists('use_pinned_memory')
read_env_flags.append('tracer_mkldnn_ops_off')
if core.is_compiled_with_cuda():
read_env_flags += [
'fraction_of_gpu_memory_to_use',
'initial_gpu_memory_in_mb',
'reallocate_gpu_memory_in_mb',
'cudnn_deterministic',
'enable_cublas_tensor_op_math',
'conv_workspace_size_limit',
'cudnn_exhaustive_search',
'selected_gpus',
'sync_nccl_allreduce',
'cudnn_batchnorm_spatial_persistent',
'gpu_allocator_retry_time',
'local_exe_sub_scope_limit',
'gpu_memory_limit_mb',
'conv2d_disable_cudnn',
'get_host_by_name_time',
]
if core.is_compiled_with_npu(): if os.name == 'nt':
read_env_flags += [ remove_flags_if_exists('cpu_deterministic')
'selected_npus',
'fraction_of_gpu_memory_to_use',
'initial_gpu_memory_in_mb',
'reallocate_gpu_memory_in_mb',
'gpu_memory_limit_mb',
'npu_config_path',
'get_host_by_name_time',
'hccl_check_nan',
'min_loss_scaling',
]
core.init_gflags(["--tryfromenv=" + ",".join(read_env_flags)]) core.init_gflags(["--tryfromenv=" + ",".join(read_env_flags)])
# Note(zhouwei25): sys may not have argv in some cases, # Note(zhouwei25): sys may not have argv in some cases,
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册