Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
Crayon鑫
Paddle
提交
ca0136a6
P
Paddle
项目概览
Crayon鑫
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
ca0136a6
编写于
9月 16, 2021
作者:
S
sneaxiy
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
make flag setter easier
上级
e93c18a3
变更
28
隐藏空白更改
内联
并排
Showing
28 changed file
with
375 addition
and
462 deletion
+375
-462
paddle/fluid/framework/details/reduce_op_handle.cc
paddle/fluid/framework/details/reduce_op_handle.cc
+1
-1
paddle/fluid/framework/ir/coalesce_grad_tensor_pass.cc
paddle/fluid/framework/ir/coalesce_grad_tensor_pass.cc
+8
-7
paddle/fluid/framework/ir/graph.cc
paddle/fluid/framework/ir/graph.cc
+2
-2
paddle/fluid/framework/ir/graph_helper.cc
paddle/fluid/framework/ir/graph_helper.cc
+3
-3
paddle/fluid/framework/operator.cc
paddle/fluid/framework/operator.cc
+2
-1
paddle/fluid/framework/parallel_executor.cc
paddle/fluid/framework/parallel_executor.cc
+7
-5
paddle/fluid/framework/scope.cc
paddle/fluid/framework/scope.cc
+1
-1
paddle/fluid/framework/unused_var_check.cc
paddle/fluid/framework/unused_var_check.cc
+5
-4
paddle/fluid/imperative/CMakeLists.txt
paddle/fluid/imperative/CMakeLists.txt
+1
-1
paddle/fluid/imperative/flags.cc
paddle/fluid/imperative/flags.cc
+4
-4
paddle/fluid/imperative/profiler.cc
paddle/fluid/imperative/profiler.cc
+2
-2
paddle/fluid/memory/allocation/CMakeLists.txt
paddle/fluid/memory/allocation/CMakeLists.txt
+1
-1
paddle/fluid/memory/allocation/allocator_facade.cc
paddle/fluid/memory/allocation/allocator_facade.cc
+5
-4
paddle/fluid/memory/allocation/auto_growth_best_fit_allocator.cc
...fluid/memory/allocation/auto_growth_best_fit_allocator.cc
+15
-12
paddle/fluid/memory/allocation/naive_best_fit_allocator.cc
paddle/fluid/memory/allocation/naive_best_fit_allocator.cc
+7
-6
paddle/fluid/operators/pscore/heter_listen_and_serv_op.cc
paddle/fluid/operators/pscore/heter_listen_and_serv_op.cc
+2
-1
paddle/fluid/platform/CMakeLists.txt
paddle/fluid/platform/CMakeLists.txt
+2
-2
paddle/fluid/platform/cpu_info.cc
paddle/fluid/platform/cpu_info.cc
+6
-3
paddle/fluid/platform/enforce.h
paddle/fluid/platform/enforce.h
+1
-0
paddle/fluid/platform/flags.cc
paddle/fluid/platform/flags.cc
+139
-107
paddle/fluid/platform/flags.h
paddle/fluid/platform/flags.h
+85
-0
paddle/fluid/platform/init.cc
paddle/fluid/platform/init.cc
+4
-3
paddle/fluid/platform/place.cc
paddle/fluid/platform/place.cc
+6
-5
paddle/fluid/platform/profiler.cc
paddle/fluid/platform/profiler.cc
+2
-1
paddle/fluid/platform/xpu/xpu_info.cc
paddle/fluid/platform/xpu/xpu_info.cc
+9
-8
paddle/fluid/pybind/global_value_getter_setter.cc
paddle/fluid/pybind/global_value_getter_setter.cc
+39
-203
paddle/fluid/pybind/reader_py.cc
paddle/fluid/pybind/reader_py.cc
+4
-3
python/paddle/fluid/__init__.py
python/paddle/fluid/__init__.py
+12
-72
未找到文件。
paddle/fluid/framework/details/reduce_op_handle.cc
浏览文件 @
ca0136a6
...
@@ -19,7 +19,7 @@
...
@@ -19,7 +19,7 @@
#include "paddle/fluid/framework/details/variable_visitor.h"
#include "paddle/fluid/framework/details/variable_visitor.h"
#include "paddle/fluid/platform/profiler.h"
#include "paddle/fluid/platform/profiler.h"
DEFINE
_bool
(
PADDLE_DEFINE_EXPORTED
_bool
(
cpu_deterministic
,
false
,
cpu_deterministic
,
false
,
"Whether to make the result of computation deterministic in CPU side."
);
"Whether to make the result of computation deterministic in CPU side."
);
...
...
paddle/fluid/framework/ir/coalesce_grad_tensor_pass.cc
浏览文件 @
ca0136a6
...
@@ -25,13 +25,14 @@ class VarDesc;
...
@@ -25,13 +25,14 @@ class VarDesc;
}
// namespace framework
}
// namespace framework
}
// namespace paddle
}
// namespace paddle
DEFINE_double
(
fuse_parameter_memory_size
,
-
1.0
,
// MBytes
PADDLE_DEFINE_EXPORTED_double
(
"fuse_parameter_memory_size is up limited memory size(MB)"
fuse_parameter_memory_size
,
-
1.0
,
// MBytes
"of one group parameters' gradient which is the input "
"fuse_parameter_memory_size is up limited memory size(MB)"
"of communication calling(e.g NCCLAllReduce). "
"of one group parameters' gradient which is the input "
"The default value is 0, it means that "
"of communication calling(e.g NCCLAllReduce). "
"not set group according to memory_size."
);
"The default value is 0, it means that "
DEFINE_int32
(
"not set group according to memory_size."
);
PADDLE_DEFINE_EXPORTED_int32
(
fuse_parameter_groups_size
,
1
,
fuse_parameter_groups_size
,
1
,
"fuse_parameter_groups_size is the up limited size of one group "
"fuse_parameter_groups_size is the up limited size of one group "
"parameters' gradient. "
"parameters' gradient. "
...
...
paddle/fluid/framework/ir/graph.cc
浏览文件 @
ca0136a6
...
@@ -17,8 +17,8 @@ limitations under the License. */
...
@@ -17,8 +17,8 @@ limitations under the License. */
#include "paddle/fluid/framework/ir/graph.h"
#include "paddle/fluid/framework/ir/graph.h"
#include "paddle/fluid/framework/operator.h"
#include "paddle/fluid/framework/operator.h"
DEFINE
_bool
(
convert_all_blocks
,
true
,
PADDLE_DEFINE_EXPORTED
_bool
(
convert_all_blocks
,
true
,
"Convert all blocks in program into SSAgraphs"
);
"Convert all blocks in program into SSAgraphs"
);
namespace
paddle
{
namespace
paddle
{
namespace
framework
{
namespace
framework
{
...
...
paddle/fluid/framework/ir/graph_helper.cc
浏览文件 @
ca0136a6
...
@@ -18,9 +18,9 @@ limitations under the License. */
...
@@ -18,9 +18,9 @@ limitations under the License. */
#include "paddle/fluid/framework/op_proto_maker.h"
#include "paddle/fluid/framework/op_proto_maker.h"
DECLARE_bool
(
convert_all_blocks
);
DECLARE_bool
(
convert_all_blocks
);
DEFINE
_string
(
print_sub_graph_dir
,
""
,
PADDLE_DEFINE_EXPORTED
_string
(
print_sub_graph_dir
,
""
,
"FLAGS_print_sub_graph_dir is used "
"FLAGS_print_sub_graph_dir is used "
"to print the nodes of sub_graphs."
);
"to print the nodes of sub_graphs."
);
namespace
paddle
{
namespace
paddle
{
namespace
framework
{
namespace
framework
{
...
...
paddle/fluid/framework/operator.cc
浏览文件 @
ca0136a6
...
@@ -47,7 +47,8 @@ class LoDTensor;
...
@@ -47,7 +47,8 @@ class LoDTensor;
DECLARE_bool
(
benchmark
);
DECLARE_bool
(
benchmark
);
DECLARE_bool
(
check_nan_inf
);
DECLARE_bool
(
check_nan_inf
);
DECLARE_bool
(
enable_unused_var_check
);
DECLARE_bool
(
enable_unused_var_check
);
DEFINE_int32
(
inner_op_parallelism
,
0
,
"number of threads for inner op"
);
PADDLE_DEFINE_EXPORTED_int32
(
inner_op_parallelism
,
0
,
"number of threads for inner op"
);
namespace
paddle
{
namespace
paddle
{
namespace
framework
{
namespace
framework
{
...
...
paddle/fluid/framework/parallel_executor.cc
浏览文件 @
ca0136a6
...
@@ -46,11 +46,13 @@ DECLARE_double(eager_delete_tensor_gb);
...
@@ -46,11 +46,13 @@ DECLARE_double(eager_delete_tensor_gb);
#ifdef WITH_GPERFTOOLS
#ifdef WITH_GPERFTOOLS
#include "gperftools/profiler.h"
#include "gperftools/profiler.h"
#endif
#endif
DEFINE_string
(
pe_profile_fname
,
""
,
PADDLE_DEFINE_EXPORTED_string
(
"Profiler filename for PE, which generated by gperftools."
pe_profile_fname
,
""
,
"Only valid when compiled `WITH_PRIFILER=ON`. Empty if disable."
);
"Profiler filename for PE, which generated by gperftools."
DEFINE_bool
(
enable_parallel_graph
,
false
,
"Only valid when compiled `WITH_PRIFILER=ON`. Empty if disable."
);
"Force disable parallel graph execution mode if set false."
);
PADDLE_DEFINE_EXPORTED_bool
(
enable_parallel_graph
,
false
,
"Force disable parallel graph execution mode if set false."
);
namespace
paddle
{
namespace
paddle
{
namespace
framework
{
namespace
framework
{
...
...
paddle/fluid/framework/scope.cc
浏览文件 @
ca0136a6
...
@@ -19,7 +19,7 @@ limitations under the License. */
...
@@ -19,7 +19,7 @@ limitations under the License. */
DECLARE_bool
(
benchmark
);
DECLARE_bool
(
benchmark
);
DEFINE
_bool
(
PADDLE_DEFINE_EXPORTED
_bool
(
eager_delete_scope
,
true
,
eager_delete_scope
,
true
,
"Delete local scope eagerly. It will reduce GPU memory usage but "
"Delete local scope eagerly. It will reduce GPU memory usage but "
"slow down the destruction of variables.(around 1% performance harm)"
);
"slow down the destruction of variables.(around 1% performance harm)"
);
...
...
paddle/fluid/framework/unused_var_check.cc
浏览文件 @
ca0136a6
...
@@ -17,15 +17,16 @@ limitations under the License. */
...
@@ -17,15 +17,16 @@ limitations under the License. */
#include <glog/logging.h>
#include <glog/logging.h>
#include <string>
#include <string>
#include "gflags/gflags.h"
#include "paddle/fluid/framework/no_need_buffer_vars_inference.h"
#include "paddle/fluid/framework/no_need_buffer_vars_inference.h"
#include "paddle/fluid/framework/op_info.h"
#include "paddle/fluid/framework/op_info.h"
#include "paddle/fluid/framework/operator.h"
#include "paddle/fluid/framework/operator.h"
#include "paddle/fluid/platform/enforce.h"
#include "paddle/fluid/platform/enforce.h"
#include "paddle/fluid/platform/flags.h"
DEFINE_bool
(
enable_unused_var_check
,
false
,
PADDLE_DEFINE_EXPORTED_bool
(
"Checking whether operator contains unused inputs, "
enable_unused_var_check
,
false
,
"especially for grad operator. It should be in unittest."
);
"Checking whether operator contains unused inputs, "
"especially for grad operator. It should be in unittest."
);
namespace
paddle
{
namespace
paddle
{
namespace
framework
{
namespace
framework
{
...
...
paddle/fluid/imperative/CMakeLists.txt
浏览文件 @
ca0136a6
...
@@ -11,7 +11,7 @@ cc_library(amp SRCS amp_auto_cast.cc DEPS layer )
...
@@ -11,7 +11,7 @@ cc_library(amp SRCS amp_auto_cast.cc DEPS layer )
cc_library
(
tracer SRCS tracer.cc DEPS layer engine program_desc_tracer amp denormal
)
cc_library
(
tracer SRCS tracer.cc DEPS layer engine program_desc_tracer amp denormal
)
cc_library
(
basic_engine SRCS basic_engine.cc DEPS layer gradient_accumulator
)
cc_library
(
basic_engine SRCS basic_engine.cc DEPS layer gradient_accumulator
)
cc_library
(
engine SRCS basic_engine.cc partial_grad_engine.cc DEPS layer gradient_accumulator
)
cc_library
(
engine SRCS basic_engine.cc partial_grad_engine.cc DEPS layer gradient_accumulator
)
cc_library
(
imperative_profiler SRCS profiler.cc
)
cc_library
(
imperative_profiler SRCS profiler.cc
DEPS flags
)
if
(
NOT WIN32
)
if
(
NOT WIN32
)
if
(
WITH_NCCL OR WITH_RCCL
)
if
(
WITH_NCCL OR WITH_RCCL
)
cc_library
(
imperative_all_reduce SRCS all_reduce.cc DEPS collective_helper device_context selected_rows tensor
)
cc_library
(
imperative_all_reduce SRCS all_reduce.cc DEPS collective_helper device_context selected_rows tensor
)
...
...
paddle/fluid/imperative/flags.cc
浏览文件 @
ca0136a6
...
@@ -13,11 +13,11 @@
...
@@ -13,11 +13,11 @@
// limitations under the License.
// limitations under the License.
#include "paddle/fluid/imperative/flags.h"
#include "paddle/fluid/imperative/flags.h"
#include "
gflags/g
flags.h"
#include "
paddle/fluid/platform/
flags.h"
DEFINE
_uint64
(
dygraph_debug
,
0
,
PADDLE_DEFINE_EXPORTED
_uint64
(
dygraph_debug
,
0
,
"Debug level of dygraph. This flag is not "
"Debug level of dygraph. This flag is not "
"open to users"
);
"open to users"
);
namespace
paddle
{
namespace
paddle
{
namespace
imperative
{
namespace
imperative
{
...
...
paddle/fluid/imperative/profiler.cc
浏览文件 @
ca0136a6
...
@@ -19,9 +19,9 @@
...
@@ -19,9 +19,9 @@
#endif
#endif
#include <glog/logging.h>
#include <glog/logging.h>
#include <mutex> // NOLINT
#include <mutex> // NOLINT
#include "
gflags/g
flags.h"
#include "
paddle/fluid/platform/
flags.h"
DEFINE
_string
(
PADDLE_DEFINE_EXPORTED
_string
(
tracer_profile_fname
,
"xxgperf"
,
tracer_profile_fname
,
"xxgperf"
,
"Profiler filename for imperative tracer, which generated by gperftools."
"Profiler filename for imperative tracer, which generated by gperftools."
"Only valid when compiled `WITH_PROFILER=ON`. Empty if disable."
);
"Only valid when compiled `WITH_PROFILER=ON`. Empty if disable."
);
...
...
paddle/fluid/memory/allocation/CMakeLists.txt
浏览文件 @
ca0136a6
...
@@ -99,7 +99,7 @@ cc_test(allocator_facade_abs_flags_test SRCS allocator_facade_abs_flags_test.cc
...
@@ -99,7 +99,7 @@ cc_test(allocator_facade_abs_flags_test SRCS allocator_facade_abs_flags_test.cc
cc_test
(
allocator_facade_frac_flags_test SRCS allocator_facade_frac_flags_test.cc DEPS allocator_facade
)
cc_test
(
allocator_facade_frac_flags_test SRCS allocator_facade_frac_flags_test.cc DEPS allocator_facade
)
cc_library
(
auto_growth_best_fit_allocator SRCS auto_growth_best_fit_allocator.cc DEPS allocator aligned_allocator
)
cc_library
(
auto_growth_best_fit_allocator SRCS auto_growth_best_fit_allocator.cc DEPS allocator aligned_allocator
flags
)
cc_test
(
auto_growth_best_fit_allocator_facade_test SRCS auto_growth_best_fit_allocator_facade_test.cc DEPS cpu_allocator auto_growth_best_fit_allocator
)
cc_test
(
auto_growth_best_fit_allocator_facade_test SRCS auto_growth_best_fit_allocator_facade_test.cc DEPS cpu_allocator auto_growth_best_fit_allocator
)
cc_test
(
auto_growth_best_fit_allocator_test SRCS auto_growth_best_fit_allocator_test.cc DEPS auto_growth_best_fit_allocator
)
cc_test
(
auto_growth_best_fit_allocator_test SRCS auto_growth_best_fit_allocator_test.cc DEPS auto_growth_best_fit_allocator
)
...
...
paddle/fluid/memory/allocation/allocator_facade.cc
浏览文件 @
ca0136a6
...
@@ -37,14 +37,15 @@
...
@@ -37,14 +37,15 @@
#endif
#endif
#include "paddle/fluid/platform/npu_info.h"
#include "paddle/fluid/platform/npu_info.h"
DEFINE
_int64
(
PADDLE_DEFINE_EXPORTED
_int64
(
gpu_allocator_retry_time
,
10000
,
gpu_allocator_retry_time
,
10000
,
"The retry time (milliseconds) when allocator fails "
"The retry time (milliseconds) when allocator fails "
"to allocate memory. No retry if this value is not greater than 0"
);
"to allocate memory. No retry if this value is not greater than 0"
);
DEFINE_bool
(
use_system_allocator
,
false
,
PADDLE_DEFINE_EXPORTED_bool
(
"Whether to use system allocator to allocate CPU and GPU memory. "
use_system_allocator
,
false
,
"Only used for unittests."
);
"Whether to use system allocator to allocate CPU and GPU memory. "
"Only used for unittests."
);
namespace
paddle
{
namespace
paddle
{
namespace
memory
{
namespace
memory
{
...
...
paddle/fluid/memory/allocation/auto_growth_best_fit_allocator.cc
浏览文件 @
ca0136a6
...
@@ -17,18 +17,21 @@
...
@@ -17,18 +17,21 @@
#include <algorithm>
#include <algorithm>
#include <mutex> // NOLINT
#include <mutex> // NOLINT
#include "paddle/fluid/memory/allocation/aligned_allocator.h"
#include "paddle/fluid/memory/allocation/aligned_allocator.h"
#include "paddle/fluid/platform/flags.h"
DEFINE_bool
(
free_idle_chunk
,
false
,
"Whether to free idle chunk when each allocation is freed. "
PADDLE_DEFINE_READONLY_EXPORTED_bool
(
"If false, all freed allocation would be cached to speed up next "
free_idle_chunk
,
false
,
"allocation request. If true, no allocation would be cached. This "
"Whether to free idle chunk when each allocation is freed. "
"flag only works when FLAGS_allocator_strategy=auto_growth."
);
"If false, all freed allocation would be cached to speed up next "
"allocation request. If true, no allocation would be cached. This "
DEFINE_bool
(
free_when_no_cache_hit
,
false
,
"flag only works when FLAGS_allocator_strategy=auto_growth."
);
"Whether to free idle chunks when no cache hit. If true, idle "
"chunk would be freed when no cache hit; if false, idle "
PADDLE_DEFINE_READONLY_EXPORTED_bool
(
"chunk would be freed when out of memory occurs. This flag "
free_when_no_cache_hit
,
false
,
"only works when FLAGS_allocator_strategy=auto_growth."
);
"Whether to free idle chunks when no cache hit. If true, idle "
"chunk would be freed when no cache hit; if false, idle "
"chunk would be freed when out of memory occurs. This flag "
"only works when FLAGS_allocator_strategy=auto_growth."
);
namespace
paddle
{
namespace
paddle
{
namespace
memory
{
namespace
memory
{
...
...
paddle/fluid/memory/allocation/naive_best_fit_allocator.cc
浏览文件 @
ca0136a6
...
@@ -34,12 +34,13 @@
...
@@ -34,12 +34,13 @@
#include "paddle/fluid/platform/xpu/xpu_header.h"
#include "paddle/fluid/platform/xpu/xpu_header.h"
#endif
#endif
DEFINE_bool
(
init_allocated_mem
,
false
,
PADDLE_DEFINE_EXPORTED_bool
(
"It is a mistake that the values of the memory allocated by "
init_allocated_mem
,
false
,
"BuddyAllocator are always zeroed in some op's implementation. "
"It is a mistake that the values of the memory allocated by "
"To find this error in time, we use init_allocated_mem to indicate "
"BuddyAllocator are always zeroed in some op's implementation. "
"that initializing the allocated memory with a small value "
"To find this error in time, we use init_allocated_mem to indicate "
"during unit testing."
);
"that initializing the allocated memory with a small value "
"during unit testing."
);
DECLARE_double
(
fraction_of_gpu_memory_to_use
);
DECLARE_double
(
fraction_of_gpu_memory_to_use
);
DECLARE_uint64
(
initial_gpu_memory_in_mb
);
DECLARE_uint64
(
initial_gpu_memory_in_mb
);
DECLARE_uint64
(
reallocate_gpu_memory_in_mb
);
DECLARE_uint64
(
reallocate_gpu_memory_in_mb
);
...
...
paddle/fluid/operators/pscore/heter_listen_and_serv_op.cc
浏览文件 @
ca0136a6
...
@@ -15,7 +15,8 @@ limitations under the License. */
...
@@ -15,7 +15,8 @@ limitations under the License. */
#include "paddle/fluid/operators/pscore/heter_listen_and_serv_op.h"
#include "paddle/fluid/operators/pscore/heter_listen_and_serv_op.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/op_registry.h"
DEFINE_int32
(
rpc_send_thread_num
,
12
,
"number of threads for rpc send"
);
PADDLE_DEFINE_EXPORTED_int32
(
rpc_send_thread_num
,
12
,
"number of threads for rpc send"
);
namespace
paddle
{
namespace
paddle
{
namespace
operators
{
namespace
operators
{
...
...
paddle/fluid/platform/CMakeLists.txt
浏览文件 @
ca0136a6
...
@@ -37,13 +37,13 @@ if (WITH_PYTHON)
...
@@ -37,13 +37,13 @@ if (WITH_PYTHON)
endif
(
NOT WIN32
)
endif
(
NOT WIN32
)
endif
()
endif
()
cc_library
(
flags SRCS flags.cc DEPS gflags
)
cc_library
(
flags SRCS flags.cc DEPS gflags
boost
)
cc_library
(
denormal SRCS denormal.cc DEPS
)
cc_library
(
denormal SRCS denormal.cc DEPS
)
cc_library
(
errors SRCS errors.cc DEPS error_codes_proto
)
cc_library
(
errors SRCS errors.cc DEPS error_codes_proto
)
cc_test
(
errors_test SRCS errors_test.cc DEPS errors enforce
)
cc_test
(
errors_test SRCS errors_test.cc DEPS errors enforce
)
set
(
enforce_deps flags errors boost
)
set
(
enforce_deps flags errors boost
flags
)
if
(
WITH_GPU
)
if
(
WITH_GPU
)
set
(
enforce_deps
${
enforce_deps
}
external_error_proto
)
set
(
enforce_deps
${
enforce_deps
}
external_error_proto
)
endif
()
endif
()
...
...
paddle/fluid/platform/cpu_info.cc
浏览文件 @
ca0136a6
...
@@ -31,7 +31,7 @@ limitations under the License. */
...
@@ -31,7 +31,7 @@ limitations under the License. */
#endif // _WIN32
#endif // _WIN32
#include <algorithm>
#include <algorithm>
#include "
gflags/g
flags.h"
#include "
paddle/fluid/platform/
flags.h"
DECLARE_double
(
fraction_of_cpu_memory_to_use
);
DECLARE_double
(
fraction_of_cpu_memory_to_use
);
DECLARE_uint64
(
initial_cpu_memory_in_mb
);
DECLARE_uint64
(
initial_cpu_memory_in_mb
);
...
@@ -42,7 +42,8 @@ DECLARE_double(fraction_of_cuda_pinned_memory_to_use);
...
@@ -42,7 +42,8 @@ DECLARE_double(fraction_of_cuda_pinned_memory_to_use);
// between host and device. Allocates too much would reduce the amount
// between host and device. Allocates too much would reduce the amount
// of memory available to the system for paging. So, by default, we
// of memory available to the system for paging. So, by default, we
// should set false to use_pinned_memory.
// should set false to use_pinned_memory.
DEFINE_bool
(
use_pinned_memory
,
true
,
"If set, allocate cpu pinned memory."
);
PADDLE_DEFINE_EXPORTED_bool
(
use_pinned_memory
,
true
,
"If set, allocate cpu pinned memory."
);
namespace
paddle
{
namespace
paddle
{
namespace
platform
{
namespace
platform
{
...
@@ -54,7 +55,9 @@ size_t CpuTotalPhysicalMemory() {
...
@@ -54,7 +55,9 @@ size_t CpuTotalPhysicalMemory() {
mib
[
1
]
=
HW_MEMSIZE
;
mib
[
1
]
=
HW_MEMSIZE
;
int64_t
size
=
0
;
int64_t
size
=
0
;
size_t
len
=
sizeof
(
size
);
size_t
len
=
sizeof
(
size
);
if
(
sysctl
(
mib
,
2
,
&
size
,
&
len
,
NULL
,
0
)
==
0
)
return
(
size_t
)
size
;
if
(
sysctl
(
mib
,
2
,
&
size
,
&
len
,
NULL
,
0
)
==
0
)
{
return
static_cast
<
size_t
>
(
size
);
}
return
0L
;
return
0L
;
#elif defined(_WIN32)
#elif defined(_WIN32)
MEMORYSTATUSEX
sMeminfo
;
MEMORYSTATUSEX
sMeminfo
;
...
...
paddle/fluid/platform/enforce.h
浏览文件 @
ca0136a6
...
@@ -101,6 +101,7 @@ limitations under the License. */
...
@@ -101,6 +101,7 @@ limitations under the License. */
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
#include "paddle/fluid/platform/type_defs.h"
#include "paddle/fluid/platform/type_defs.h"
#endif
#endif
#include "paddle/fluid/platform/flags.h"
namespace
paddle
{
namespace
paddle
{
namespace
platform
{
namespace
platform
{
...
...
paddle/fluid/platform/flags.cc
浏览文件 @
ca0136a6
...
@@ -12,11 +12,22 @@
...
@@ -12,11 +12,22 @@
// See the License for the specific language governing permissions and
// See the License for the specific language governing permissions and
// limitations under the License.
// limitations under the License.
#include "
gflags/g
flags.h"
#include "
paddle/fluid/platform/
flags.h"
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
#include "paddle/fluid/platform/cudnn_workspace_helper.h"
#include "paddle/fluid/platform/cudnn_workspace_helper.h"
#endif
#endif
namespace
paddle
{
namespace
platform
{
const
ExportedFlagInfoMap
&
GetExportedFlagInfoMap
()
{
static
ExportedFlagInfoMap
g_exported_flag_info_map
;
return
g_exported_flag_info_map
;
}
}
// namespace platform
}
// namespace paddle
/**
/**
* NOTE(paddle-dev): This file is designed to define all public FLAGS.
* NOTE(paddle-dev): This file is designed to define all public FLAGS.
*/
*/
...
@@ -30,8 +41,8 @@
...
@@ -30,8 +41,8 @@
* instance to 2
* instance to 2
* Note:
* Note:
*/
*/
DEFINE
_int32
(
paddle_num_threads
,
1
,
PADDLE_DEFINE_EXPORTED
_int32
(
paddle_num_threads
,
1
,
"Number of threads for each paddle instance."
);
"Number of threads for each paddle instance."
);
/**
/**
* Operator related FLAG
* Operator related FLAG
...
@@ -41,9 +52,10 @@ DEFINE_int32(paddle_num_threads, 1,
...
@@ -41,9 +52,10 @@ DEFINE_int32(paddle_num_threads, 1,
* Example:
* Example:
* Note: Used to debug. Checking whether operator produce NAN/INF or not.
* Note: Used to debug. Checking whether operator produce NAN/INF or not.
*/
*/
DEFINE_bool
(
check_nan_inf
,
false
,
PADDLE_DEFINE_EXPORTED_bool
(
"Checking whether operator produce NAN/INF or not. It will be "
check_nan_inf
,
false
,
"extremely slow so please use this flag wisely."
);
"Checking whether operator produce NAN/INF or not. It will be "
"extremely slow so please use this flag wisely."
);
// NOTE(zhiqiu): better to share the flags, otherwise we will have too many
// NOTE(zhiqiu): better to share the flags, otherwise we will have too many
// flags.
// flags.
...
@@ -58,7 +70,7 @@ DEFINE_bool(check_nan_inf, false,
...
@@ -58,7 +70,7 @@ DEFINE_bool(check_nan_inf, false,
* Example:
* Example:
* Note: whether to use Tensor Core, faster but it may loss precision.
* Note: whether to use Tensor Core, faster but it may loss precision.
*/
*/
DEFINE
_bool
(
PADDLE_DEFINE_EXPORTED
_bool
(
enable_cublas_tensor_op_math
,
false
,
enable_cublas_tensor_op_math
,
false
,
"The enable_cublas_tensor_op_math indicate whether to use Tensor Core, "
"The enable_cublas_tensor_op_math indicate whether to use Tensor Core, "
"but it may loss precision. Currently, There are two CUDA libraries that"
"but it may loss precision. Currently, There are two CUDA libraries that"
...
@@ -77,30 +89,34 @@ DEFINE_bool(
...
@@ -77,30 +89,34 @@ DEFINE_bool(
* cards
* cards
* Note: A list of device ids separated by comma, like: 0,1,2,3
* Note: A list of device ids separated by comma, like: 0,1,2,3
*/
*/
DEFINE_string
(
selected_gpus
,
""
,
PADDLE_DEFINE_EXPORTED_string
(
"A list of device ids separated by comma, like: 0,1,2,3. "
selected_gpus
,
""
,
"This option is useful when doing multi process training and "
"A list of device ids separated by comma, like: 0,1,2,3. "
"each process have only one device (GPU). If you want to use "
"This option is useful when doing multi process training and "
"all visible devices, set this to empty string. NOTE: the "
"each process have only one device (GPU). If you want to use "
"reason of doing this is that we want to use P2P communication"
"all visible devices, set this to empty string. NOTE: the "
"between GPU devices, use CUDA_VISIBLE_DEVICES can only use"
"reason of doing this is that we want to use P2P communication"
"share-memory only."
);
"between GPU devices, use CUDA_VISIBLE_DEVICES can only use"
"share-memory only."
);
#endif
#endif
#if defined(PADDLE_WITH_ASCEND_CL)
#if defined(PADDLE_WITH_ASCEND_CL)
DEFINE_string
(
selected_npus
,
""
,
PADDLE_DEFINE_EXPORTED_string
(
"A list of device ids separated by comma, like: 0,1,2,3. "
selected_npus
,
""
,
"This option is useful when doing multi process training and "
"A list of device ids separated by comma, like: 0,1,2,3. "
"each process have only one device (NPU). If you want to use "
"This option is useful when doing multi process training and "
"all visible devices, set this to empty string."
);
"each process have only one device (NPU). If you want to use "
DEFINE_bool
(
hccl_check_nan
,
true
,
"all visible devices, set this to empty string."
);
"Check Nan in tensor before hccl_allreduce_sum otherwise it'll "
PADDLE_DEFINE_EXPORTED_bool
(
"core when meets Nan value"
);
hccl_check_nan
,
true
,
DEFINE_string
(
"Check Nan in tensor before hccl_allreduce_sum otherwise it'll "
"core when meets Nan value"
);
PADDLE_DEFINE_EXPORTED_string
(
npu_config_path
,
""
,
npu_config_path
,
""
,
"The absolute path of configuration json file, like: /tmp/config.json. "
"The absolute path of configuration json file, like: /tmp/config.json. "
"If proveided, it will be passed to aclInit()."
);
"If proveided, it will be passed to aclInit()."
);
DEFINE_int32
(
min_loss_scaling
,
1
,
"set minmum loss scaling value!"
);
PADDLE_DEFINE_EXPORTED_int32
(
min_loss_scaling
,
1
,
"set minmum loss scaling value!"
);
#endif
#endif
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
...
@@ -113,10 +129,11 @@ DEFINE_int32(min_loss_scaling, 1, "set minmum loss scaling value!");
...
@@ -113,10 +129,11 @@ DEFINE_int32(min_loss_scaling, 1, "set minmum loss scaling value!");
* Note: whether to use deterministic algorithm in cudnn.
* Note: whether to use deterministic algorithm in cudnn.
* If true, it will slow down some operators such as conv and pooling.
* If true, it will slow down some operators such as conv and pooling.
*/
*/
DEFINE_bool
(
cudnn_deterministic
,
false
,
PADDLE_DEFINE_EXPORTED_bool
(
"Whether allow using an autotuning algorithm for convolution "
cudnn_deterministic
,
false
,
"operator. The autotuning algorithm may be non-deterministic. If "
"Whether allow using an autotuning algorithm for convolution "
"true, the algorithm is deterministic."
);
"operator. The autotuning algorithm may be non-deterministic. If "
"true, the algorithm is deterministic."
);
/**
/**
* CUDNN related FLAG
* CUDNN related FLAG
...
@@ -130,9 +147,10 @@ DEFINE_bool(cudnn_deterministic, false,
...
@@ -130,9 +147,10 @@ DEFINE_bool(cudnn_deterministic, false,
* increased.
* increased.
* Users need to balance memory and speed.
* Users need to balance memory and speed.
*/
*/
DEFINE_uint64
(
conv_workspace_size_limit
,
PADDLE_DEFINE_EXPORTED_uint64
(
paddle
::
platform
::
kDefaultConvWorkspaceSizeLimitMB
,
conv_workspace_size_limit
,
"cuDNN convolution workspace limit in MB unit."
);
paddle
::
platform
::
kDefaultConvWorkspaceSizeLimitMB
,
"cuDNN convolution workspace limit in MB unit."
);
/**
/**
* CUDNN related FLAG
* CUDNN related FLAG
...
@@ -148,9 +166,10 @@ DEFINE_uint64(conv_workspace_size_limit,
...
@@ -148,9 +166,10 @@ DEFINE_uint64(conv_workspace_size_limit,
* layer specification. Once you change the layer specifications
* layer specification. Once you change the layer specifications
* (such as batch size, feature map size), it will search again.
* (such as batch size, feature map size), it will search again.
*/
*/
DEFINE_bool
(
cudnn_exhaustive_search
,
false
,
PADDLE_DEFINE_EXPORTED_bool
(
"Whether enable exhaustive search for cuDNN convolution or "
cudnn_exhaustive_search
,
false
,
"not, default is False."
);
"Whether enable exhaustive search for cuDNN convolution or "
"not, default is False."
);
/**
/**
* CUDNN related FLAG
* CUDNN related FLAG
...
@@ -160,9 +179,9 @@ DEFINE_bool(cudnn_exhaustive_search, false,
...
@@ -160,9 +179,9 @@ DEFINE_bool(cudnn_exhaustive_search, false,
* Example:
* Example:
* Note: only used to predict for advanced developer
* Note: only used to predict for advanced developer
*/
*/
DEFINE
_int64
(
cudnn_exhaustive_search_times
,
-
1
,
PADDLE_DEFINE_EXPORTED
_int64
(
cudnn_exhaustive_search_times
,
-
1
,
"Exhaustive search times for cuDNN convolution, "
"Exhaustive search times for cuDNN convolution, "
"default is -1, not exhaustive search"
);
"default is -1, not exhaustive search"
);
/**
/**
* CUDNN related FLAG
* CUDNN related FLAG
...
@@ -180,9 +199,10 @@ DEFINE_int64(cudnn_exhaustive_search_times, -1,
...
@@ -180,9 +199,10 @@ DEFINE_int64(cudnn_exhaustive_search_times, -1,
* certain
* certain
* input data range.
* input data range.
*/
*/
DEFINE_bool
(
cudnn_batchnorm_spatial_persistent
,
false
,
PADDLE_DEFINE_EXPORTED_bool
(
"Whether enable CUDNN_BATCHNORM_SPATIAL_PERSISTENT mode for cudnn "
cudnn_batchnorm_spatial_persistent
,
false
,
"batch_norm, default is False."
);
"Whether enable CUDNN_BATCHNORM_SPATIAL_PERSISTENT mode for cudnn "
"batch_norm, default is False."
);
#endif
#endif
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
...
@@ -197,7 +217,7 @@ DEFINE_bool(cudnn_batchnorm_spatial_persistent, false,
...
@@ -197,7 +217,7 @@ DEFINE_bool(cudnn_batchnorm_spatial_persistent, false,
* https://github.com/PaddlePaddle/Paddle/issues/15049
* https://github.com/PaddlePaddle/Paddle/issues/15049
* If you want to change this default value, why?(gongwb)
* If you want to change this default value, why?(gongwb)
*/
*/
DEFINE
_bool
(
PADDLE_DEFINE_EXPORTED
_bool
(
sync_nccl_allreduce
,
true
,
sync_nccl_allreduce
,
true
,
"If set true, will call `cudaStreamSynchronize(nccl_stream)`"
"If set true, will call `cudaStreamSynchronize(nccl_stream)`"
"after allreduce, this mode can get better performance in some scenarios."
);
"after allreduce, this mode can get better performance in some scenarios."
);
...
@@ -215,11 +235,12 @@ DEFINE_bool(
...
@@ -215,11 +235,12 @@ DEFINE_bool(
* into the queue, and then the communicator takes the gradients out
* into the queue, and then the communicator takes the gradients out
* of the queue and sends them after merging.
* of the queue and sends them after merging.
*/
*/
DEFINE_int32
(
communicator_max_merge_var_num
,
20
,
PADDLE_DEFINE_EXPORTED_int32
(
communicator_max_merge_var_num
,
20
,
"max var num to merge and send"
);
"max var num to merge and send"
);
DEFINE_bool
(
communicator_is_sgd_optimizer
,
true
,
PADDLE_DEFINE_EXPORTED_bool
(
"gradient sent to the server is the sum of the gradients "
communicator_is_sgd_optimizer
,
true
,
"calculated by each thread if optimizer is sgd"
);
"gradient sent to the server is the sum of the gradients "
"calculated by each thread if optimizer is sgd"
);
/**
/**
* Distributed related FLAG
* Distributed related FLAG
* Name: FLAGS_communicator_send_queue_size
* Name: FLAGS_communicator_send_queue_size
...
@@ -233,8 +254,8 @@ DEFINE_bool(communicator_is_sgd_optimizer, true,
...
@@ -233,8 +254,8 @@ DEFINE_bool(communicator_is_sgd_optimizer, true,
* space. It is used to avoid training much faster than communication,
* space. It is used to avoid training much faster than communication,
* so that too many gradients are not sent out in time.
* so that too many gradients are not sent out in time.
*/
*/
DEFINE
_int32
(
communicator_send_queue_size
,
20
,
PADDLE_DEFINE_EXPORTED
_int32
(
communicator_send_queue_size
,
20
,
"queue size to recv gradient before send"
);
"queue size to recv gradient before send"
);
#endif
#endif
/**
/**
...
@@ -246,8 +267,9 @@ DEFINE_int32(communicator_send_queue_size, 20,
...
@@ -246,8 +267,9 @@ DEFINE_int32(communicator_send_queue_size, 20,
* Note: Control the number of threads used for distributed modules.
* Note: Control the number of threads used for distributed modules.
* If it is not set, it is set to a hard thread.
* If it is not set, it is set to a hard thread.
*/
*/
DEFINE_int32
(
dist_threadpool_size
,
0
,
PADDLE_DEFINE_EXPORTED_int32
(
"number of threads used for distributed executed."
);
dist_threadpool_size
,
0
,
"number of threads used for distributed executed."
);
/**
/**
* Garbage collector related FLAG
* Garbage collector related FLAG
...
@@ -272,7 +294,7 @@ static const double kDefaultEagerDeleteTensorGB = -1;
...
@@ -272,7 +294,7 @@ static const double kDefaultEagerDeleteTensorGB = -1;
static
const
double
kDefaultEagerDeleteTensorGB
=
0
;
static
const
double
kDefaultEagerDeleteTensorGB
=
0
;
#endif
#endif
DEFINE
_double
(
PADDLE_DEFINE_EXPORTED
_double
(
eager_delete_tensor_gb
,
kDefaultEagerDeleteTensorGB
,
eager_delete_tensor_gb
,
kDefaultEagerDeleteTensorGB
,
"Memory size threshold (GB) when the garbage collector clear tensors."
"Memory size threshold (GB) when the garbage collector clear tensors."
"Disabled when this value is less than 0"
);
"Disabled when this value is less than 0"
);
...
@@ -289,9 +311,10 @@ DEFINE_double(
...
@@ -289,9 +311,10 @@ DEFINE_double(
* has finished, which will make the garbage collection strategy faster.
* has finished, which will make the garbage collection strategy faster.
* Only works when garbage collection strategy is enabled.
* Only works when garbage collection strategy is enabled.
*/
*/
DEFINE_bool
(
fast_eager_deletion_mode
,
true
,
PADDLE_DEFINE_EXPORTED_bool
(
"Fast eager deletion mode. If enabled, memory would release "
fast_eager_deletion_mode
,
true
,
"immediately without waiting GPU kernel ends."
);
"Fast eager deletion mode. If enabled, memory would release "
"immediately without waiting GPU kernel ends."
);
/**
/**
* Memory related FLAG
* Memory related FLAG
...
@@ -311,11 +334,12 @@ DEFINE_bool(fast_eager_deletion_mode, true,
...
@@ -311,11 +334,12 @@ DEFINE_bool(fast_eager_deletion_mode, true,
* largest FLAGS_memory_fraction_of_eager_deletion ratio will be released.
* largest FLAGS_memory_fraction_of_eager_deletion ratio will be released.
* The flag is only valid when running parallel data compilers.
* The flag is only valid when running parallel data compilers.
*/
*/
DEFINE_double
(
memory_fraction_of_eager_deletion
,
1.0
,
PADDLE_DEFINE_EXPORTED_double
(
"Fraction of eager deletion. If less than 1.0, all variables in "
memory_fraction_of_eager_deletion
,
1.0
,
"the program would be sorted according to its memory size, and "
"Fraction of eager deletion. If less than 1.0, all variables in "
"only the FLAGS_memory_fraction_of_eager_deletion of the largest "
"the program would be sorted according to its memory size, and "
"variables would be deleted."
);
"only the FLAGS_memory_fraction_of_eager_deletion of the largest "
"variables would be deleted."
);
/**
/**
* Allocator related FLAG
* Allocator related FLAG
...
@@ -331,7 +355,7 @@ static constexpr char kDefaultAllocatorStrategy[] = "naive_best_fit";
...
@@ -331,7 +355,7 @@ static constexpr char kDefaultAllocatorStrategy[] = "naive_best_fit";
#else
#else
static
constexpr
char
kDefaultAllocatorStrategy
[]
=
"auto_growth"
;
static
constexpr
char
kDefaultAllocatorStrategy
[]
=
"auto_growth"
;
#endif
#endif
DEFINE
_string
(
PADDLE_DEFINE_EXPORTED
_string
(
allocator_strategy
,
kDefaultAllocatorStrategy
,
allocator_strategy
,
kDefaultAllocatorStrategy
,
"The allocation strategy, enum in [naive_best_fit, auto_growth]. "
"The allocation strategy, enum in [naive_best_fit, auto_growth]. "
"naive_best_fit means the original pre-allocated allocator of Paddle. "
"naive_best_fit means the original pre-allocated allocator of Paddle. "
...
@@ -358,9 +382,9 @@ DEFINE_string(
...
@@ -358,9 +382,9 @@ DEFINE_string(
* size as the memory block will be allocated from the CUDA pinned
* size as the memory block will be allocated from the CUDA pinned
* request util the CPU does not have enough memory.
* request util the CPU does not have enough memory.
*/
*/
DEFINE
_double
(
fraction_of_cpu_memory_to_use
,
1
,
PADDLE_DEFINE_EXPORTED
_double
(
fraction_of_cpu_memory_to_use
,
1
,
"Default use 100% of CPU memory for PaddlePaddle,"
"Default use 100% of CPU memory for PaddlePaddle,"
"reserve the rest for page tables, etc"
);
"reserve the rest for page tables, etc"
);
/**
/**
* Memory related FLAG
* Memory related FLAG
...
@@ -374,8 +398,9 @@ DEFINE_double(fraction_of_cpu_memory_to_use, 1,
...
@@ -374,8 +398,9 @@ DEFINE_double(fraction_of_cpu_memory_to_use, 1,
* FLAGS_fraction_of_cpu_memory_to_use*(total physical memory)
* FLAGS_fraction_of_cpu_memory_to_use*(total physical memory)
* as memory block sizes.
* as memory block sizes.
*/
*/
DEFINE_uint64
(
initial_cpu_memory_in_mb
,
500ul
,
PADDLE_DEFINE_EXPORTED_uint64
(
"Initial CPU memory for PaddlePaddle, in MD unit."
);
initial_cpu_memory_in_mb
,
500ul
,
"Initial CPU memory for PaddlePaddle, in MD unit."
);
/**
/**
* Memory related FLAG
* Memory related FLAG
...
@@ -390,7 +415,7 @@ DEFINE_uint64(initial_cpu_memory_in_mb, 500ul,
...
@@ -390,7 +415,7 @@ DEFINE_uint64(initial_cpu_memory_in_mb, 500ul,
* size as the memory block will be allocated from the CPU
* size as the memory block will be allocated from the CPU
* request util the CPU does not have enough memory.
* request util the CPU does not have enough memory.
*/
*/
DEFINE
_double
(
PADDLE_DEFINE_EXPORTED
_double
(
fraction_of_cuda_pinned_memory_to_use
,
0.5
,
fraction_of_cuda_pinned_memory_to_use
,
0.5
,
"Default use 50% of CPU memory as the pinned_memory for PaddlePaddle,"
"Default use 50% of CPU memory as the pinned_memory for PaddlePaddle,"
"reserve the rest for page tables, etc"
);
"reserve the rest for page tables, etc"
);
...
@@ -425,12 +450,13 @@ constexpr static float fraction_of_gpu_memory_to_use = 0.92f;
...
@@ -425,12 +450,13 @@ constexpr static float fraction_of_gpu_memory_to_use = 0.92f;
// which may lead to insufficient memory left for paddle
// which may lead to insufficient memory left for paddle
constexpr
static
float
fraction_of_gpu_memory_to_use
=
0.5
f
;
constexpr
static
float
fraction_of_gpu_memory_to_use
=
0.5
f
;
#endif
#endif
DEFINE_double
(
fraction_of_gpu_memory_to_use
,
fraction_of_gpu_memory_to_use
,
PADDLE_DEFINE_EXPORTED_double
(
"Allocate a trunk of gpu memory that is this fraction of the "
fraction_of_gpu_memory_to_use
,
fraction_of_gpu_memory_to_use
,
"total gpu memory size. Future memory usage will be allocated "
"Allocate a trunk of gpu memory that is this fraction of the "
"from the trunk. If the trunk doesn't have enough gpu memory, "
"total gpu memory size. Future memory usage will be allocated "
"additional trunks of the same size will be requested from gpu "
"from the trunk. If the trunk doesn't have enough gpu memory, "
"until the gpu has no memory left for another trunk."
);
"additional trunks of the same size will be requested from gpu "
"until the gpu has no memory left for another trunk."
);
/**
/**
* Memory related FLAG
* Memory related FLAG
...
@@ -444,7 +470,7 @@ DEFINE_double(fraction_of_gpu_memory_to_use, fraction_of_gpu_memory_to_use,
...
@@ -444,7 +470,7 @@ DEFINE_double(fraction_of_gpu_memory_to_use, fraction_of_gpu_memory_to_use,
* FLAGS_reallocate_gpu_memory_in_mb will be requested from the GPU until
* FLAGS_reallocate_gpu_memory_in_mb will be requested from the GPU until
* the GPU has no remaining memory.
* the GPU has no remaining memory.
*/
*/
DEFINE
_uint64
(
PADDLE_DEFINE_EXPORTED
_uint64
(
initial_gpu_memory_in_mb
,
0ul
,
initial_gpu_memory_in_mb
,
0ul
,
"Allocate a trunk of gpu memory whose byte size is specified by "
"Allocate a trunk of gpu memory whose byte size is specified by "
"the flag. Future memory usage will be allocated from the "
"the flag. Future memory usage will be allocated from the "
...
@@ -466,18 +492,20 @@ DEFINE_uint64(
...
@@ -466,18 +492,20 @@ DEFINE_uint64(
* Note: If the allocated GPU memory blocks are exhausted,
* Note: If the allocated GPU memory blocks are exhausted,
* additional GPU memory blocks are reallocated
* additional GPU memory blocks are reallocated
*/
*/
DEFINE_uint64
(
reallocate_gpu_memory_in_mb
,
0ul
,
PADDLE_DEFINE_EXPORTED_uint64
(
"If this flag is set, Paddle will reallocate the gpu memory with "
reallocate_gpu_memory_in_mb
,
0ul
,
"size specified by this flag. Else Paddle will reallocate by "
"If this flag is set, Paddle will reallocate the gpu memory with "
"FLAGS_fraction_of_gpu_memory_to_use"
);
"size specified by this flag. Else Paddle will reallocate by "
"FLAGS_fraction_of_gpu_memory_to_use"
);
DEFINE_uint64
(
gpu_memory_limit_mb
,
0UL
,
"The maximum gpu memory limit that the process can allocate. "
PADDLE_DEFINE_EXPORTED_uint64
(
"If it is equal to 0, there would be no limit and all gpu memory "
gpu_memory_limit_mb
,
0UL
,
"would be available to the process. If it is larger than 0, "
"The maximum gpu memory limit that the process can allocate. "
"the process would raise out of memory error if the allocated "
"If it is equal to 0, there would be no limit and all gpu memory "
"memory exceeds the limit even though there is available "
"would be available to the process. If it is larger than 0, "
"memory on the gpu card. The unit is MB and default value is 0."
);
"the process would raise out of memory error if the allocated "
"memory exceeds the limit even though there is available "
"memory on the gpu card. The unit is MB and default value is 0."
);
#endif
#endif
...
@@ -489,11 +517,12 @@ DEFINE_uint64(gpu_memory_limit_mb, 0UL,
...
@@ -489,11 +517,12 @@ DEFINE_uint64(gpu_memory_limit_mb, 0UL,
* Example:
* Example:
* Note:
* Note:
*/
*/
DEFINE_double
(
local_exe_sub_scope_limit
,
256.0
,
// MBytes
PADDLE_DEFINE_EXPORTED_double
(
"The memory up limit of sub-scopes of local execution scope for "
local_exe_sub_scope_limit
,
256.0
,
// MBytes
"each CUDAPlace. If you don't need to limit the memory, "
"The memory up limit of sub-scopes of local execution scope for "
"you should set FLAGS_local_exe_sub_scope_limit=-1. "
"each CUDAPlace. If you don't need to limit the memory, "
"The default value is 256 MBytes."
);
"you should set FLAGS_local_exe_sub_scope_limit=-1. "
"The default value is 256 MBytes."
);
/**
/**
* MKLDNN related FLAG
* MKLDNN related FLAG
...
@@ -503,7 +532,7 @@ DEFINE_double(local_exe_sub_scope_limit, 256.0, // MBytes
...
@@ -503,7 +532,7 @@ DEFINE_double(local_exe_sub_scope_limit, 256.0, // MBytes
* Example:
* Example:
* Note:
* Note:
*/
*/
DEFINE
_bool
(
use_mkldnn
,
false
,
"Use MKLDNN to run"
);
PADDLE_DEFINE_EXPORTED
_bool
(
use_mkldnn
,
false
,
"Use MKLDNN to run"
);
/**
/**
* Debug related FLAG
* Debug related FLAG
...
@@ -525,7 +554,7 @@ static const int32_t kDefaultCallStackLevel = 2;
...
@@ -525,7 +554,7 @@ static const int32_t kDefaultCallStackLevel = 2;
static
const
int32_t
kDefaultCallStackLevel
=
1
;
static
const
int32_t
kDefaultCallStackLevel
=
1
;
#endif
#endif
DEFINE
_int32
(
PADDLE_DEFINE_EXPORTED
_int32
(
call_stack_level
,
kDefaultCallStackLevel
,
call_stack_level
,
kDefaultCallStackLevel
,
"Determine the call stack to print when error or exeception happens."
"Determine the call stack to print when error or exeception happens."
// TODO(zhiqiu): implement logic of FLAGS_call_stack_level==0
// TODO(zhiqiu): implement logic of FLAGS_call_stack_level==0
...
@@ -545,9 +574,9 @@ DEFINE_int32(
...
@@ -545,9 +574,9 @@ DEFINE_int32(
* Note: If True, gradients are summed by the reverse order of
* Note: If True, gradients are summed by the reverse order of
* the forward execution sequence.
* the forward execution sequence.
*/
*/
DEFINE
_bool
(
sort_sum_gradient
,
false
,
PADDLE_DEFINE_EXPORTED
_bool
(
sort_sum_gradient
,
false
,
"Sum gradients by the reverse order of "
"Sum gradients by the reverse order of "
"the forward execution sequence."
);
"the forward execution sequence."
);
/**
/**
* Performance related FLAG
* Performance related FLAG
...
@@ -557,7 +586,7 @@ DEFINE_bool(sort_sum_gradient, false,
...
@@ -557,7 +586,7 @@ DEFINE_bool(sort_sum_gradient, false,
* Example:
* Example:
* Note: The maximum number of inplace grad_add.
* Note: The maximum number of inplace grad_add.
*/
*/
DEFINE
_int32
(
PADDLE_DEFINE_EXPORTED
_int32
(
max_inplace_grad_add
,
0
,
max_inplace_grad_add
,
0
,
"The maximum number of inplace grad_add. When doing "
"The maximum number of inplace grad_add. When doing "
"gradient accumulation, if the number of gradients need to that "
"gradient accumulation, if the number of gradients need to that "
...
@@ -572,8 +601,8 @@ DEFINE_int32(
...
@@ -572,8 +601,8 @@ DEFINE_int32(
* Example:
* Example:
* Note: Holds list of operation types with OneDNN kernels to be enabled.
* Note: Holds list of operation types with OneDNN kernels to be enabled.
*/
*/
DEFINE
_string
(
tracer_mkldnn_ops_on
,
""
,
PADDLE_DEFINE_EXPORTED
_string
(
tracer_mkldnn_ops_on
,
""
,
"List of OneDNN operation types to be turned on"
);
"List of OneDNN operation types to be turned on"
);
/**
/**
* Debug related FLAG
* Debug related FLAG
...
@@ -583,8 +612,9 @@ DEFINE_string(tracer_mkldnn_ops_on, "",
...
@@ -583,8 +612,9 @@ DEFINE_string(tracer_mkldnn_ops_on, "",
* Example:
* Example:
* Note: Holds list of operation types with OneDNN kernels to be disabled.
* Note: Holds list of operation types with OneDNN kernels to be disabled.
*/
*/
DEFINE_string
(
tracer_mkldnn_ops_off
,
""
,
PADDLE_DEFINE_EXPORTED_string
(
"List of OneDNN operation types to be turned off"
);
tracer_mkldnn_ops_off
,
""
,
"List of OneDNN operation types to be turned off"
);
/**
/**
* Debug related FLAG
* Debug related FLAG
...
@@ -595,8 +625,9 @@ DEFINE_string(tracer_mkldnn_ops_off, "",
...
@@ -595,8 +625,9 @@ DEFINE_string(tracer_mkldnn_ops_off, "",
* Note: Check kernel launch status after every kernel compute.
* Note: Check kernel launch status after every kernel compute.
*/
*/
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
DEFINE_bool
(
check_kernel_launch
,
false
,
PADDLE_DEFINE_EXPORTED_bool
(
"Check kernel launch status after every kernel compute"
);
check_kernel_launch
,
false
,
"Check kernel launch status after every kernel compute"
);
#endif
#endif
/**
/**
...
@@ -608,7 +639,8 @@ DEFINE_bool(check_kernel_launch, false,
...
@@ -608,7 +639,8 @@ DEFINE_bool(check_kernel_launch, false,
* Note: Disable cudnn in conv2d.
* Note: Disable cudnn in conv2d.
*/
*/
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
DEFINE_bool
(
conv2d_disable_cudnn
,
false
,
"Disable cudnn in conv2d"
);
PADDLE_DEFINE_EXPORTED_bool
(
conv2d_disable_cudnn
,
false
,
"Disable cudnn in conv2d"
);
#endif
#endif
/**
/**
...
@@ -621,8 +653,8 @@ DEFINE_bool(conv2d_disable_cudnn, false, "Disable cudnn in conv2d");
...
@@ -621,8 +653,8 @@ DEFINE_bool(conv2d_disable_cudnn, false, "Disable cudnn in conv2d");
*/
*/
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_XPU) || \
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_XPU) || \
defined(PADDLE_WITH_ASCEND_CL) || defined(PADDLE_WITH_HIP)
defined(PADDLE_WITH_ASCEND_CL) || defined(PADDLE_WITH_HIP)
DEFINE
_int32
(
get_host_by_name_time
,
120
,
PADDLE_DEFINE_EXPORTED
_int32
(
get_host_by_name_time
,
120
,
"The maximum time for get host by name time"
);
"The maximum time for get host by name time"
);
#endif
#endif
/**
/**
...
@@ -634,6 +666,6 @@ DEFINE_int32(get_host_by_name_time, 120,
...
@@ -634,6 +666,6 @@ DEFINE_int32(get_host_by_name_time, 120,
* program when using Fleet APIs.
* program when using Fleet APIs.
* Note: Apply IR pass to program. Be only useful when using Fleet APIs.
* Note: Apply IR pass to program. Be only useful when using Fleet APIs.
*/
*/
DEFINE
_bool
(
PADDLE_DEFINE_EXPORTED
_bool
(
apply_pass_to_program
,
false
,
apply_pass_to_program
,
false
,
"It controls whether to apply IR pass to program when using Fleet APIs"
);
"It controls whether to apply IR pass to program when using Fleet APIs"
);
paddle/fluid/platform/flags.h
0 → 100644
浏览文件 @
ca0136a6
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <cstdint>
#include <map>
#include <string>
#include <type_traits>
#include <typeindex>
#include "boost/variant.hpp"
#include "gflags/gflags.h"
#include "paddle/fluid/platform/macros.h"
namespace
paddle
{
namespace
platform
{
struct
FlagInfo
{
using
ValueType
=
boost
::
variant
<
bool
,
int32_t
,
int64_t
,
uint64_t
,
double
,
std
::
string
>
;
std
::
string
name
;
void
*
value_ptr
;
ValueType
default_value
;
std
::
string
doc
;
bool
is_writable
;
};
using
ExportedFlagInfoMap
=
std
::
map
<
std
::
string
,
FlagInfo
>
;
const
ExportedFlagInfoMap
&
GetExportedFlagInfoMap
();
#define __PADDLE_DEFINE_EXPORTED_FLAG(__name, __is_writable, __cpp_type, \
__gflag_type, __default_value, __doc) \
DEFINE_##__gflag_type(__name, __default_value, __doc); \
struct __PaddleRegisterFlag_##__name { \
__PaddleRegisterFlag_##__name() { \
const auto &instance = ::paddle::platform::GetExportedFlagInfoMap(); \
using Type = ::paddle::platform::ExportedFlagInfoMap; \
auto &info = const_cast<Type &>(instance)[#__name]; \
info.name = #__name; \
info.value_ptr = &(FLAGS_##__name); \
info.default_value = static_cast<__cpp_type>(__default_value); \
info.doc = __doc; \
info.is_writable = __is_writable; \
} \
}; \
static_assert(std::is_same<__PaddleRegisterFlag_##__name, \
::__PaddleRegisterFlag_##__name>::value, \
"FLAGS should define in global namespace"); \
static __PaddleRegisterFlag_##__name __PaddleRegisterFlag_instance##__name
#define PADDLE_DEFINE_EXPORTED_bool(name, default_value, doc) \
__PADDLE_DEFINE_EXPORTED_FLAG(name, true, bool, bool, default_value, doc)
#define PADDLE_DEFINE_READONLY_EXPORTED_bool(name, default_value, doc) \
__PADDLE_DEFINE_EXPORTED_FLAG(name, false, bool, bool, default_value, doc)
#define PADDLE_DEFINE_EXPORTED_int32(name, default_value, doc) \
__PADDLE_DEFINE_EXPORTED_FLAG(name, true, int32_t, int32, default_value, doc)
#define PADDLE_DEFINE_EXPORTED_int64(name, default_value, doc) \
__PADDLE_DEFINE_EXPORTED_FLAG(name, true, int64_t, int64, default_value, doc)
#define PADDLE_DEFINE_EXPORTED_uint64(name, default_value, doc) \
__PADDLE_DEFINE_EXPORTED_FLAG(name, true, uint64_t, uint64, default_value, \
doc)
#define PADDLE_DEFINE_EXPORTED_double(name, default_value, doc) \
__PADDLE_DEFINE_EXPORTED_FLAG(name, true, double, double, default_value, doc)
#define PADDLE_DEFINE_EXPORTED_string(name, default_value, doc) \
__PADDLE_DEFINE_EXPORTED_FLAG(name, true, ::std::string, string, \
default_value, doc)
}
// namespace platform
}
// namespace paddle
paddle/fluid/platform/init.cc
浏览文件 @
ca0136a6
...
@@ -43,9 +43,10 @@ limitations under the License. */
...
@@ -43,9 +43,10 @@ limitations under the License. */
#endif
#endif
DECLARE_int32
(
paddle_num_threads
);
DECLARE_int32
(
paddle_num_threads
);
DEFINE_int32
(
multiple_of_cupti_buffer_size
,
1
,
PADDLE_DEFINE_EXPORTED_int32
(
"Multiple of the CUPTI device buffer size. If the timestamps have "
multiple_of_cupti_buffer_size
,
1
,
"been dropped when you are profiling, try increasing this value."
);
"Multiple of the CUPTI device buffer size. If the timestamps have "
"been dropped when you are profiling, try increasing this value."
);
namespace
paddle
{
namespace
paddle
{
namespace
platform
{
namespace
platform
{
...
...
paddle/fluid/platform/place.cc
浏览文件 @
ca0136a6
...
@@ -14,11 +14,12 @@ limitations under the License. */
...
@@ -14,11 +14,12 @@ limitations under the License. */
#include "paddle/fluid/platform/place.h"
#include "paddle/fluid/platform/place.h"
DEFINE_bool
(
benchmark
,
false
,
PADDLE_DEFINE_EXPORTED_bool
(
"Doing memory benchmark. It will make deleting scope synchronized, "
benchmark
,
false
,
"and add some memory usage logs."
"Doing memory benchmark. It will make deleting scope synchronized, "
"Default cuda is asynchronous device, set to True will"
"and add some memory usage logs."
"force op run in synchronous mode."
);
"Default cuda is asynchronous device, set to True will"
"force op run in synchronous mode."
);
namespace
paddle
{
namespace
paddle
{
namespace
platform
{
namespace
platform
{
...
...
paddle/fluid/platform/profiler.cc
浏览文件 @
ca0136a6
...
@@ -24,7 +24,8 @@ limitations under the License. */
...
@@ -24,7 +24,8 @@ limitations under the License. */
#include "paddle/fluid/platform/dynload/nvtx.h"
#include "paddle/fluid/platform/dynload/nvtx.h"
#endif
#endif
DEFINE_bool
(
enable_rpc_profiler
,
false
,
"Enable rpc profiler or not."
);
PADDLE_DEFINE_EXPORTED_bool
(
enable_rpc_profiler
,
false
,
"Enable rpc profiler or not."
);
namespace
paddle
{
namespace
paddle
{
namespace
platform
{
namespace
platform
{
...
...
paddle/fluid/platform/xpu/xpu_info.cc
浏览文件 @
ca0136a6
...
@@ -18,14 +18,15 @@ limitations under the License. */
...
@@ -18,14 +18,15 @@ limitations under the License. */
#include "paddle/fluid/platform/xpu/xpu_header.h"
#include "paddle/fluid/platform/xpu/xpu_header.h"
#include "paddle/fluid/string/split.h"
#include "paddle/fluid/string/split.h"
DEFINE_string
(
selected_xpus
,
""
,
PADDLE_DEFINE_EXPORTED_string
(
"A list of device ids separated by comma, like: 0,1,2,3. "
selected_xpus
,
""
,
"This option is useful when doing multi process training and "
"A list of device ids separated by comma, like: 0,1,2,3. "
"each process have only one device (XPU). If you want to use "
"This option is useful when doing multi process training and "
"all visible devices, set this to empty string. NOTE: the "
"each process have only one device (XPU). If you want to use "
"reason of doing this is that we want to use P2P communication"
"all visible devices, set this to empty string. NOTE: the "
"between XPU devices, use XPU_VISIBLE_DEVICES can only use"
"reason of doing this is that we want to use P2P communication"
"share-memory only."
);
"between XPU devices, use XPU_VISIBLE_DEVICES can only use"
"share-memory only."
);
namespace
paddle
{
namespace
paddle
{
namespace
platform
{
namespace
platform
{
...
...
paddle/fluid/pybind/global_value_getter_setter.cc
浏览文件 @
ca0136a6
...
@@ -29,82 +29,8 @@
...
@@ -29,82 +29,8 @@
#include "paddle/fluid/platform/macros.h"
#include "paddle/fluid/platform/macros.h"
#include "pybind11/stl.h"
#include "pybind11/stl.h"
// data processing
// NOTE: where is these 2 flags from?
DECLARE_bool
(
use_mkldnn
);
DECLARE_string
(
tracer_mkldnn_ops_on
);
DECLARE_string
(
tracer_mkldnn_ops_off
);
// debug
DECLARE_bool
(
check_nan_inf
);
DECLARE_bool
(
cpu_deterministic
);
DECLARE_bool
(
enable_rpc_profiler
);
DECLARE_int32
(
multiple_of_cupti_buffer_size
);
DECLARE_bool
(
reader_queue_speed_test_mode
);
DECLARE_int32
(
call_stack_level
);
DECLARE_bool
(
sort_sum_gradient
);
DECLARE_bool
(
check_kernel_launch
);
// device management
DECLARE_int32
(
paddle_num_threads
);
// executor
DECLARE_bool
(
enable_parallel_graph
);
DECLARE_string
(
pe_profile_fname
);
DECLARE_string
(
print_sub_graph_dir
);
DECLARE_bool
(
use_ngraph
);
// memory management
DECLARE_string
(
allocator_strategy
);
DECLARE_double
(
eager_delete_tensor_gb
);
DECLARE_double
(
fraction_of_cpu_memory_to_use
);
DECLARE_bool
(
free_idle_chunk
);
DECLARE_bool
(
free_when_no_cache_hit
);
DECLARE_int32
(
fuse_parameter_groups_size
);
DECLARE_double
(
fuse_parameter_memory_size
);
DECLARE_bool
(
init_allocated_mem
);
DECLARE_uint64
(
initial_cpu_memory_in_mb
);
DECLARE_double
(
memory_fraction_of_eager_deletion
);
DECLARE_bool
(
use_pinned_memory
);
DECLARE_bool
(
use_system_allocator
);
// others
DECLARE_bool
(
benchmark
);
DECLARE_int32
(
inner_op_parallelism
);
DECLARE_int32
(
max_inplace_grad_add
);
DECLARE_string
(
tracer_profile_fname
);
DECLARE_bool
(
apply_pass_to_program
);
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
// cudnn
DECLARE_uint64
(
conv_workspace_size_limit
);
DECLARE_bool
(
cudnn_batchnorm_spatial_persistent
);
DECLARE_bool
(
cudnn_deterministic
);
DECLARE_bool
(
cudnn_exhaustive_search
);
DECLARE_bool
(
conv2d_disable_cudnn
);
// data processing
DECLARE_bool
(
enable_cublas_tensor_op_math
);
// device management
DECLARE_string
(
selected_gpus
);
// memory management
DECLARE_bool
(
eager_delete_scope
);
DECLARE_bool
(
fast_eager_deletion_mode
);
DECLARE_double
(
fraction_of_cuda_pinned_memory_to_use
);
DECLARE_double
(
fraction_of_gpu_memory_to_use
);
DECLARE_uint64
(
gpu_memory_limit_mb
);
DECLARE_uint64
(
initial_gpu_memory_in_mb
);
DECLARE_uint64
(
reallocate_gpu_memory_in_mb
);
// others
DECLARE_bool
(
sync_nccl_allreduce
);
#endif
#ifdef PADDLE_WITH_XPU
// device management
DECLARE_string
(
selected_xpus
);
#endif
#ifdef PADDLE_WITH_ASCEND_CL
// device management
DECLARE_string
(
selected_npus
);
// set minmum loss scaling value
DECLARE_int32
(
min_loss_scaling
);
#endif
#ifdef PADDLE_WITH_DISTRIBUTE
#ifdef PADDLE_WITH_DISTRIBUTE
DECLARE_int32
(
rpc_send_thread_num
);
DECLARE_int32
(
rpc_get_thread_num
);
DECLARE_int32
(
rpc_get_thread_num
);
DECLARE_int32
(
rpc_prefetch_thread_num
);
DECLARE_int32
(
rpc_prefetch_thread_num
);
#endif
#endif
...
@@ -181,7 +107,6 @@ class PYBIND11_HIDDEN GlobalVarGetterSetterRegistry {
...
@@ -181,7 +107,6 @@ class PYBIND11_HIDDEN GlobalVarGetterSetterRegistry {
PADDLE_ENFORCE_NOT_NULL
(
setter
,
PADDLE_ENFORCE_NOT_NULL
(
setter
,
platform
::
errors
::
InvalidArgument
(
platform
::
errors
::
InvalidArgument
(
"Setter of %s should not be null"
,
name
));
"Setter of %s should not be null"
,
name
));
var_infos_
.
insert
({
name
,
VarInfo
(
is_public
,
getter
,
setter
)});
var_infos_
.
insert
({
name
,
VarInfo
(
is_public
,
getter
,
setter
)});
}
}
...
@@ -243,81 +168,6 @@ class PYBIND11_HIDDEN GlobalVarGetterSetterRegistry {
...
@@ -243,81 +168,6 @@ class PYBIND11_HIDDEN GlobalVarGetterSetterRegistry {
GlobalVarGetterSetterRegistry
GlobalVarGetterSetterRegistry
::
instance_
;
GlobalVarGetterSetterRegistry
GlobalVarGetterSetterRegistry
::
instance_
;
class
GlobalVarGetterSetterRegistryHelper
{
public:
GlobalVarGetterSetterRegistryHelper
(
bool
is_public
,
bool
is_writable
,
const
std
::
string
&
var_names
)
:
is_public_
(
is_public
),
is_writable_
(
is_writable
),
var_names_
(
SplitVarNames
(
var_names
))
{}
template
<
typename
...
Args
>
void
Register
(
Args
&&
...
args
)
const
{
Impl
<
0
,
sizeof
...(
args
)
==
1
,
Args
...
>::
Register
(
is_public_
,
is_writable_
,
var_names_
,
std
::
forward
<
Args
>
(
args
)...);
}
private:
static
std
::
vector
<
std
::
string
>
SplitVarNames
(
const
std
::
string
&
names
)
{
auto
valid_char
=
[](
char
ch
)
{
return
!
std
::
isspace
(
ch
)
&&
ch
!=
','
;
};
std
::
vector
<
std
::
string
>
ret
;
size_t
i
=
0
,
j
=
0
,
n
=
names
.
size
();
while
(
i
<
n
)
{
for
(;
i
<
n
&&
!
valid_char
(
names
[
i
]);
++
i
)
{
}
for
(
j
=
i
+
1
;
j
<
n
&&
valid_char
(
names
[
j
]);
++
j
)
{
}
if
(
i
<
n
&&
j
<=
n
)
{
auto
substring
=
names
.
substr
(
i
,
j
-
i
);
VLOG
(
10
)
<<
"Get substring:
\"
"
<<
substring
<<
"
\"
"
;
ret
.
emplace_back
(
substring
);
}
i
=
j
+
1
;
}
return
ret
;
}
private:
template
<
size_t
kIdx
,
bool
kIsStop
,
typename
T
,
typename
...
Args
>
struct
Impl
{
static
void
Register
(
bool
is_public
,
bool
is_writable
,
const
std
::
vector
<
std
::
string
>
&
var_names
,
T
&&
var
,
Args
&&
...
args
)
{
PADDLE_ENFORCE_EQ
(
kIdx
+
1
+
sizeof
...(
args
),
var_names
.
size
(),
platform
::
errors
::
InvalidArgument
(
"Argument number not match name number"
));
Impl
<
kIdx
,
true
,
T
>::
Register
(
is_public
,
is_writable
,
var_names
,
var
);
Impl
<
kIdx
+
1
,
sizeof
...(
Args
)
==
1
,
Args
...
>::
Register
(
is_public
,
is_writable
,
var_names
,
std
::
forward
<
Args
>
(
args
)...);
}
};
template
<
size_t
kIdx
,
typename
T
>
struct
Impl
<
kIdx
,
true
,
T
>
{
static
void
Register
(
bool
is_public
,
bool
is_writable
,
const
std
::
vector
<
std
::
string
>
&
var_names
,
T
&&
var
)
{
auto
*
instance
=
GlobalVarGetterSetterRegistry
::
MutableInstance
();
if
(
is_writable
)
{
instance
->
Register
(
var_names
[
kIdx
],
is_public
,
GlobalVarGetterSetterRegistry
::
CreateGetter
(
std
::
forward
<
T
>
(
var
)),
GlobalVarGetterSetterRegistry
::
CreateSetter
(
&
var
));
}
else
{
instance
->
Register
(
var_names
[
kIdx
],
is_public
,
GlobalVarGetterSetterRegistry
::
CreateGetter
(
std
::
forward
<
T
>
(
var
)));
}
}
};
private:
const
bool
is_public_
;
const
bool
is_writable_
;
const
std
::
vector
<
std
::
string
>
var_names_
;
};
static
void
RegisterGlobalVarGetterSetter
();
static
void
RegisterGlobalVarGetterSetter
();
void
BindGlobalValueGetterSetter
(
pybind11
::
module
*
module
)
{
void
BindGlobalValueGetterSetter
(
pybind11
::
module
*
module
)
{
...
@@ -338,65 +188,51 @@ void BindGlobalValueGetterSetter(pybind11::module *module) {
...
@@ -338,65 +188,51 @@ void BindGlobalValueGetterSetter(pybind11::module *module) {
}
}
/* Public vars are designed to be writable. */
/* Public vars are designed to be writable. */
#define REGISTER_PUBLIC_GLOBAL_VAR(...) \
#define REGISTER_PUBLIC_GLOBAL_VAR(var) \
do { \
do { \
GlobalVarGetterSetterRegistryHelper(
/*is_public=*/
true, \
auto *instance = GlobalVarGetterSetterRegistry::MutableInstance(); \
/*is_writable=*/
true, "" #__VA_ARGS__) \
instance->Register(#var,
/*is_public=*/
true, \
.Register(__VA_ARGS__); \
GlobalVarGetterSetterRegistry::CreateGetter(var), \
GlobalVarGetterSetterRegistry::CreateSetter(&var)); \
} while (0)
} while (0)
#define REGISTER_PRIVATE_GLOBAL_VAR(is_writable, ...) \
struct
RegisterGetterSetterVisitor
:
public
boost
::
static_visitor
<
void
>
{
do { \
RegisterGetterSetterVisitor
(
const
std
::
string
&
name
,
bool
is_public
,
GlobalVarGetterSetterRegistryHelper(
/*is_public=*/
false, is_writable, \
void
*
value_ptr
)
"" #__VA_ARGS__) \
:
name_
(
name
),
value_ptr_
(
value_ptr
)
{}
.Register(__VA_ARGS__); \
} while (0)
static
void
RegisterGlobalVarGetterSetter
()
{
template
<
typename
T
>
REGISTER_PRIVATE_GLOBAL_VAR
(
/*is_writable=*/
false
,
FLAGS_free_idle_chunk
,
void
operator
()(
const
T
&
)
const
{
FLAGS_free_when_no_cache_hit
);
auto
&
value
=
*
static_cast
<
T
*>
(
value_ptr_
);
auto
*
instance
=
GlobalVarGetterSetterRegistry
::
MutableInstance
();
REGISTER_PUBLIC_GLOBAL_VAR
(
instance
->
Register
(
name_
,
is_public_
,
FLAGS_eager_delete_tensor_gb
,
FLAGS_enable_parallel_graph
,
GlobalVarGetterSetterRegistry
::
CreateGetter
(
value
),
FLAGS_allocator_strategy
,
FLAGS_use_system_allocator
,
FLAGS_check_nan_inf
,
GlobalVarGetterSetterRegistry
::
CreateSetter
(
&
value
));
FLAGS_call_stack_level
,
FLAGS_sort_sum_gradient
,
FLAGS_cpu_deterministic
,
}
FLAGS_enable_rpc_profiler
,
FLAGS_multiple_of_cupti_buffer_size
,
FLAGS_reader_queue_speed_test_mode
,
FLAGS_pe_profile_fname
,
FLAGS_print_sub_graph_dir
,
FLAGS_fraction_of_cpu_memory_to_use
,
FLAGS_fuse_parameter_groups_size
,
FLAGS_fuse_parameter_memory_size
,
FLAGS_init_allocated_mem
,
FLAGS_initial_cpu_memory_in_mb
,
FLAGS_memory_fraction_of_eager_deletion
,
FLAGS_use_pinned_memory
,
FLAGS_benchmark
,
FLAGS_inner_op_parallelism
,
FLAGS_tracer_profile_fname
,
FLAGS_paddle_num_threads
,
FLAGS_use_mkldnn
,
FLAGS_max_inplace_grad_add
,
FLAGS_tracer_mkldnn_ops_on
,
FLAGS_tracer_mkldnn_ops_off
,
FLAGS_apply_pass_to_program
);
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
REGISTER_PUBLIC_GLOBAL_VAR
(
FLAGS_gpu_memory_limit_mb
,
FLAGS_cudnn_deterministic
,
FLAGS_conv_workspace_size_limit
,
FLAGS_cudnn_batchnorm_spatial_persistent
,
FLAGS_cudnn_exhaustive_search
,
FLAGS_eager_delete_scope
,
FLAGS_fast_eager_deletion_mode
,
FLAGS_fraction_of_cuda_pinned_memory_to_use
,
FLAGS_fraction_of_gpu_memory_to_use
,
FLAGS_initial_gpu_memory_in_mb
,
FLAGS_reallocate_gpu_memory_in_mb
,
FLAGS_enable_cublas_tensor_op_math
,
FLAGS_selected_gpus
,
FLAGS_sync_nccl_allreduce
,
FLAGS_conv2d_disable_cudnn
,
FLAGS_check_kernel_launch
);
#endif
#ifdef PADDLE_WITH_XPU
REGISTER_PUBLIC_GLOBAL_VAR
(
FLAGS_selected_xpus
);
#endif
#ifdef PADDLE_WITH_ASCEND_CL
private:
REGISTER_PUBLIC_GLOBAL_VAR
(
FLAGS_selected_npus
);
std
::
string
name_
;
REGISTER_PUBLIC_GLOBAL_VAR
(
FLAGS_min_loss_scaling
);
bool
is_public_
;
#endif
void
*
value_ptr_
;
};
static
void
RegisterGlobalVarGetterSetter
()
{
#ifdef PADDLE_WITH_DITRIBUTE
#ifdef PADDLE_WITH_DITRIBUTE
REGISTER_PUBLIC_GLOBAL_VAR
(
FLAGS_rpc_send_thread_num
,
REGISTER_PUBLIC_GLOBAL_VAR
(
FLAGS_rpc_get_thread_num
);
FLAGS_rpc_get_thread_num
,
REGISTER_PUBLIC_GLOBAL_VAR
(
FLAGS_rpc_prefetch_thread_num
);
FLAGS_rpc_prefetch_thread_num
);
#endif
#endif
const
auto
&
flag_map
=
platform
::
GetExportedFlagInfoMap
();
for
(
const
auto
&
pair
:
flag_map
)
{
const
std
::
string
&
name
=
pair
.
second
.
name
;
bool
is_writable
=
pair
.
second
.
is_writable
;
void
*
value_ptr
=
const_cast
<
void
*>
(
pair
.
second
.
value_ptr
);
const
auto
&
default_value
=
pair
.
second
.
default_value
;
RegisterGetterSetterVisitor
visitor
(
"FLAGS_"
+
name
,
is_writable
,
value_ptr
);
boost
::
apply_visitor
(
visitor
,
default_value
);
}
}
}
}
// namespace pybind
}
// namespace pybind
}
// namespace paddle
}
// namespace paddle
paddle/fluid/pybind/reader_py.cc
浏览文件 @
ca0136a6
...
@@ -32,9 +32,10 @@
...
@@ -32,9 +32,10 @@
#include "paddle/fluid/platform/place.h"
#include "paddle/fluid/platform/place.h"
#include "pybind11/stl.h"
#include "pybind11/stl.h"
DEFINE_bool
(
reader_queue_speed_test_mode
,
false
,
PADDLE_DEFINE_EXPORTED_bool
(
"If set true, the queue.pop will only get data from queue but not "
reader_queue_speed_test_mode
,
false
,
"remove the data from queue for speed testing"
);
"If set true, the queue.pop will only get data from queue but not "
"remove the data from queue for speed testing"
);
namespace
paddle
{
namespace
paddle
{
namespace
pybind
{
namespace
pybind
{
...
...
python/paddle/fluid/__init__.py
浏览文件 @
ca0136a6
...
@@ -176,83 +176,23 @@ def __bootstrap__():
...
@@ -176,83 +176,23 @@ def __bootstrap__():
print
(
'PLEASE USE OMP_NUM_THREADS WISELY.'
,
file
=
sys
.
stderr
)
print
(
'PLEASE USE OMP_NUM_THREADS WISELY.'
,
file
=
sys
.
stderr
)
os
.
environ
[
'OMP_NUM_THREADS'
]
=
str
(
num_threads
)
os
.
environ
[
'OMP_NUM_THREADS'
]
=
str
(
num_threads
)
sysstr
=
platform
.
system
()
flag_prefix
=
'FLAGS_'
read_env_flags
=
[
read_env_flags
=
[
'check_nan_inf'
,
key
[
len
(
flag_prefix
):]
for
key
in
core
.
globals
().
keys
()
'convert_all_blocks'
,
if
key
.
startswith
(
flag_prefix
)
'benchmark'
,
'eager_delete_scope'
,
'fraction_of_cpu_memory_to_use'
,
'initial_cpu_memory_in_mb'
,
'init_allocated_mem'
,
'paddle_num_threads'
,
'dist_threadpool_size'
,
'eager_delete_tensor_gb'
,
'fast_eager_deletion_mode'
,
'memory_fraction_of_eager_deletion'
,
'allocator_strategy'
,
'reader_queue_speed_test_mode'
,
'print_sub_graph_dir'
,
'pe_profile_fname'
,
'inner_op_parallelism'
,
'enable_parallel_graph'
,
'fuse_parameter_groups_size'
,
'multiple_of_cupti_buffer_size'
,
'fuse_parameter_memory_size'
,
'tracer_profile_fname'
,
'dygraph_debug'
,
'use_system_allocator'
,
'enable_unused_var_check'
,
'free_idle_chunk'
,
'free_when_no_cache_hit'
,
'call_stack_level'
,
'sort_sum_gradient'
,
'max_inplace_grad_add'
,
'apply_pass_to_program'
,
'new_executor_use_inplace'
,
]
]
if
'Darwin'
not
in
sysstr
:
read_env_flags
.
append
(
'use_pinned_memory'
)
if
os
.
name
!=
'nt'
:
def
remove_flag_if_exists
(
name
):
read_env_flags
.
append
(
'cpu_deterministic'
)
if
name
in
read_env_flags
:
read_env_flags
.
remove
(
name
)
if
core
.
is_compiled_with_mkldnn
():
sysstr
=
platform
.
system
()
read_env_flags
.
append
(
'use_mkldnn'
)
if
'Darwin'
in
sysstr
:
read_env_flags
.
append
(
'tracer_mkldnn_ops_on'
)
remove_flags_if_exists
(
'use_pinned_memory'
)
read_env_flags
.
append
(
'tracer_mkldnn_ops_off'
)
if
core
.
is_compiled_with_cuda
():
read_env_flags
+=
[
'fraction_of_gpu_memory_to_use'
,
'initial_gpu_memory_in_mb'
,
'reallocate_gpu_memory_in_mb'
,
'cudnn_deterministic'
,
'enable_cublas_tensor_op_math'
,
'conv_workspace_size_limit'
,
'cudnn_exhaustive_search'
,
'selected_gpus'
,
'sync_nccl_allreduce'
,
'cudnn_batchnorm_spatial_persistent'
,
'gpu_allocator_retry_time'
,
'local_exe_sub_scope_limit'
,
'gpu_memory_limit_mb'
,
'conv2d_disable_cudnn'
,
'get_host_by_name_time'
,
]
if
core
.
is_compiled_with_npu
():
if
os
.
name
==
'nt'
:
read_env_flags
+=
[
remove_flags_if_exists
(
'cpu_deterministic'
)
'selected_npus'
,
'fraction_of_gpu_memory_to_use'
,
'initial_gpu_memory_in_mb'
,
'reallocate_gpu_memory_in_mb'
,
'gpu_memory_limit_mb'
,
'npu_config_path'
,
'get_host_by_name_time'
,
'hccl_check_nan'
,
'min_loss_scaling'
,
]
core
.
init_gflags
([
"--tryfromenv="
+
","
.
join
(
read_env_flags
)])
core
.
init_gflags
([
"--tryfromenv="
+
","
.
join
(
read_env_flags
)])
# Note(zhouwei25): sys may not have argv in some cases,
# Note(zhouwei25): sys may not have argv in some cases,
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录