Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
Paddle
提交
ca0136a6
P
Paddle
项目概览
PaddlePaddle
/
Paddle
大约 2 年 前同步成功
通知
2325
Star
20933
Fork
5424
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1423
列表
看板
标记
里程碑
合并请求
543
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1,423
Issue
1,423
列表
看板
标记
里程碑
合并请求
543
合并请求
543
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
ca0136a6
编写于
9月 16, 2021
作者:
S
sneaxiy
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
make flag setter easier
上级
e93c18a3
变更
28
展开全部
隐藏空白更改
内联
并排
Showing
28 changed file
with
375 addition
and
462 deletion
+375
-462
paddle/fluid/framework/details/reduce_op_handle.cc
paddle/fluid/framework/details/reduce_op_handle.cc
+1
-1
paddle/fluid/framework/ir/coalesce_grad_tensor_pass.cc
paddle/fluid/framework/ir/coalesce_grad_tensor_pass.cc
+8
-7
paddle/fluid/framework/ir/graph.cc
paddle/fluid/framework/ir/graph.cc
+2
-2
paddle/fluid/framework/ir/graph_helper.cc
paddle/fluid/framework/ir/graph_helper.cc
+3
-3
paddle/fluid/framework/operator.cc
paddle/fluid/framework/operator.cc
+2
-1
paddle/fluid/framework/parallel_executor.cc
paddle/fluid/framework/parallel_executor.cc
+7
-5
paddle/fluid/framework/scope.cc
paddle/fluid/framework/scope.cc
+1
-1
paddle/fluid/framework/unused_var_check.cc
paddle/fluid/framework/unused_var_check.cc
+5
-4
paddle/fluid/imperative/CMakeLists.txt
paddle/fluid/imperative/CMakeLists.txt
+1
-1
paddle/fluid/imperative/flags.cc
paddle/fluid/imperative/flags.cc
+4
-4
paddle/fluid/imperative/profiler.cc
paddle/fluid/imperative/profiler.cc
+2
-2
paddle/fluid/memory/allocation/CMakeLists.txt
paddle/fluid/memory/allocation/CMakeLists.txt
+1
-1
paddle/fluid/memory/allocation/allocator_facade.cc
paddle/fluid/memory/allocation/allocator_facade.cc
+5
-4
paddle/fluid/memory/allocation/auto_growth_best_fit_allocator.cc
...fluid/memory/allocation/auto_growth_best_fit_allocator.cc
+15
-12
paddle/fluid/memory/allocation/naive_best_fit_allocator.cc
paddle/fluid/memory/allocation/naive_best_fit_allocator.cc
+7
-6
paddle/fluid/operators/pscore/heter_listen_and_serv_op.cc
paddle/fluid/operators/pscore/heter_listen_and_serv_op.cc
+2
-1
paddle/fluid/platform/CMakeLists.txt
paddle/fluid/platform/CMakeLists.txt
+2
-2
paddle/fluid/platform/cpu_info.cc
paddle/fluid/platform/cpu_info.cc
+6
-3
paddle/fluid/platform/enforce.h
paddle/fluid/platform/enforce.h
+1
-0
paddle/fluid/platform/flags.cc
paddle/fluid/platform/flags.cc
+139
-107
paddle/fluid/platform/flags.h
paddle/fluid/platform/flags.h
+85
-0
paddle/fluid/platform/init.cc
paddle/fluid/platform/init.cc
+4
-3
paddle/fluid/platform/place.cc
paddle/fluid/platform/place.cc
+6
-5
paddle/fluid/platform/profiler.cc
paddle/fluid/platform/profiler.cc
+2
-1
paddle/fluid/platform/xpu/xpu_info.cc
paddle/fluid/platform/xpu/xpu_info.cc
+9
-8
paddle/fluid/pybind/global_value_getter_setter.cc
paddle/fluid/pybind/global_value_getter_setter.cc
+39
-203
paddle/fluid/pybind/reader_py.cc
paddle/fluid/pybind/reader_py.cc
+4
-3
python/paddle/fluid/__init__.py
python/paddle/fluid/__init__.py
+12
-72
未找到文件。
paddle/fluid/framework/details/reduce_op_handle.cc
浏览文件 @
ca0136a6
...
@@ -19,7 +19,7 @@
...
@@ -19,7 +19,7 @@
#include "paddle/fluid/framework/details/variable_visitor.h"
#include "paddle/fluid/framework/details/variable_visitor.h"
#include "paddle/fluid/platform/profiler.h"
#include "paddle/fluid/platform/profiler.h"
DEFINE
_bool
(
PADDLE_DEFINE_EXPORTED
_bool
(
cpu_deterministic
,
false
,
cpu_deterministic
,
false
,
"Whether to make the result of computation deterministic in CPU side."
);
"Whether to make the result of computation deterministic in CPU side."
);
...
...
paddle/fluid/framework/ir/coalesce_grad_tensor_pass.cc
浏览文件 @
ca0136a6
...
@@ -25,13 +25,14 @@ class VarDesc;
...
@@ -25,13 +25,14 @@ class VarDesc;
}
// namespace framework
}
// namespace framework
}
// namespace paddle
}
// namespace paddle
DEFINE_double
(
fuse_parameter_memory_size
,
-
1.0
,
// MBytes
PADDLE_DEFINE_EXPORTED_double
(
"fuse_parameter_memory_size is up limited memory size(MB)"
fuse_parameter_memory_size
,
-
1.0
,
// MBytes
"of one group parameters' gradient which is the input "
"fuse_parameter_memory_size is up limited memory size(MB)"
"of communication calling(e.g NCCLAllReduce). "
"of one group parameters' gradient which is the input "
"The default value is 0, it means that "
"of communication calling(e.g NCCLAllReduce). "
"not set group according to memory_size."
);
"The default value is 0, it means that "
DEFINE_int32
(
"not set group according to memory_size."
);
PADDLE_DEFINE_EXPORTED_int32
(
fuse_parameter_groups_size
,
1
,
fuse_parameter_groups_size
,
1
,
"fuse_parameter_groups_size is the up limited size of one group "
"fuse_parameter_groups_size is the up limited size of one group "
"parameters' gradient. "
"parameters' gradient. "
...
...
paddle/fluid/framework/ir/graph.cc
浏览文件 @
ca0136a6
...
@@ -17,8 +17,8 @@ limitations under the License. */
...
@@ -17,8 +17,8 @@ limitations under the License. */
#include "paddle/fluid/framework/ir/graph.h"
#include "paddle/fluid/framework/ir/graph.h"
#include "paddle/fluid/framework/operator.h"
#include "paddle/fluid/framework/operator.h"
DEFINE
_bool
(
convert_all_blocks
,
true
,
PADDLE_DEFINE_EXPORTED
_bool
(
convert_all_blocks
,
true
,
"Convert all blocks in program into SSAgraphs"
);
"Convert all blocks in program into SSAgraphs"
);
namespace
paddle
{
namespace
paddle
{
namespace
framework
{
namespace
framework
{
...
...
paddle/fluid/framework/ir/graph_helper.cc
浏览文件 @
ca0136a6
...
@@ -18,9 +18,9 @@ limitations under the License. */
...
@@ -18,9 +18,9 @@ limitations under the License. */
#include "paddle/fluid/framework/op_proto_maker.h"
#include "paddle/fluid/framework/op_proto_maker.h"
DECLARE_bool
(
convert_all_blocks
);
DECLARE_bool
(
convert_all_blocks
);
DEFINE
_string
(
print_sub_graph_dir
,
""
,
PADDLE_DEFINE_EXPORTED
_string
(
print_sub_graph_dir
,
""
,
"FLAGS_print_sub_graph_dir is used "
"FLAGS_print_sub_graph_dir is used "
"to print the nodes of sub_graphs."
);
"to print the nodes of sub_graphs."
);
namespace
paddle
{
namespace
paddle
{
namespace
framework
{
namespace
framework
{
...
...
paddle/fluid/framework/operator.cc
浏览文件 @
ca0136a6
...
@@ -47,7 +47,8 @@ class LoDTensor;
...
@@ -47,7 +47,8 @@ class LoDTensor;
DECLARE_bool
(
benchmark
);
DECLARE_bool
(
benchmark
);
DECLARE_bool
(
check_nan_inf
);
DECLARE_bool
(
check_nan_inf
);
DECLARE_bool
(
enable_unused_var_check
);
DECLARE_bool
(
enable_unused_var_check
);
DEFINE_int32
(
inner_op_parallelism
,
0
,
"number of threads for inner op"
);
PADDLE_DEFINE_EXPORTED_int32
(
inner_op_parallelism
,
0
,
"number of threads for inner op"
);
namespace
paddle
{
namespace
paddle
{
namespace
framework
{
namespace
framework
{
...
...
paddle/fluid/framework/parallel_executor.cc
浏览文件 @
ca0136a6
...
@@ -46,11 +46,13 @@ DECLARE_double(eager_delete_tensor_gb);
...
@@ -46,11 +46,13 @@ DECLARE_double(eager_delete_tensor_gb);
#ifdef WITH_GPERFTOOLS
#ifdef WITH_GPERFTOOLS
#include "gperftools/profiler.h"
#include "gperftools/profiler.h"
#endif
#endif
DEFINE_string
(
pe_profile_fname
,
""
,
PADDLE_DEFINE_EXPORTED_string
(
"Profiler filename for PE, which generated by gperftools."
pe_profile_fname
,
""
,
"Only valid when compiled `WITH_PRIFILER=ON`. Empty if disable."
);
"Profiler filename for PE, which generated by gperftools."
DEFINE_bool
(
enable_parallel_graph
,
false
,
"Only valid when compiled `WITH_PRIFILER=ON`. Empty if disable."
);
"Force disable parallel graph execution mode if set false."
);
PADDLE_DEFINE_EXPORTED_bool
(
enable_parallel_graph
,
false
,
"Force disable parallel graph execution mode if set false."
);
namespace
paddle
{
namespace
paddle
{
namespace
framework
{
namespace
framework
{
...
...
paddle/fluid/framework/scope.cc
浏览文件 @
ca0136a6
...
@@ -19,7 +19,7 @@ limitations under the License. */
...
@@ -19,7 +19,7 @@ limitations under the License. */
DECLARE_bool
(
benchmark
);
DECLARE_bool
(
benchmark
);
DEFINE
_bool
(
PADDLE_DEFINE_EXPORTED
_bool
(
eager_delete_scope
,
true
,
eager_delete_scope
,
true
,
"Delete local scope eagerly. It will reduce GPU memory usage but "
"Delete local scope eagerly. It will reduce GPU memory usage but "
"slow down the destruction of variables.(around 1% performance harm)"
);
"slow down the destruction of variables.(around 1% performance harm)"
);
...
...
paddle/fluid/framework/unused_var_check.cc
浏览文件 @
ca0136a6
...
@@ -17,15 +17,16 @@ limitations under the License. */
...
@@ -17,15 +17,16 @@ limitations under the License. */
#include <glog/logging.h>
#include <glog/logging.h>
#include <string>
#include <string>
#include "gflags/gflags.h"
#include "paddle/fluid/framework/no_need_buffer_vars_inference.h"
#include "paddle/fluid/framework/no_need_buffer_vars_inference.h"
#include "paddle/fluid/framework/op_info.h"
#include "paddle/fluid/framework/op_info.h"
#include "paddle/fluid/framework/operator.h"
#include "paddle/fluid/framework/operator.h"
#include "paddle/fluid/platform/enforce.h"
#include "paddle/fluid/platform/enforce.h"
#include "paddle/fluid/platform/flags.h"
DEFINE_bool
(
enable_unused_var_check
,
false
,
PADDLE_DEFINE_EXPORTED_bool
(
"Checking whether operator contains unused inputs, "
enable_unused_var_check
,
false
,
"especially for grad operator. It should be in unittest."
);
"Checking whether operator contains unused inputs, "
"especially for grad operator. It should be in unittest."
);
namespace
paddle
{
namespace
paddle
{
namespace
framework
{
namespace
framework
{
...
...
paddle/fluid/imperative/CMakeLists.txt
浏览文件 @
ca0136a6
...
@@ -11,7 +11,7 @@ cc_library(amp SRCS amp_auto_cast.cc DEPS layer )
...
@@ -11,7 +11,7 @@ cc_library(amp SRCS amp_auto_cast.cc DEPS layer )
cc_library
(
tracer SRCS tracer.cc DEPS layer engine program_desc_tracer amp denormal
)
cc_library
(
tracer SRCS tracer.cc DEPS layer engine program_desc_tracer amp denormal
)
cc_library
(
basic_engine SRCS basic_engine.cc DEPS layer gradient_accumulator
)
cc_library
(
basic_engine SRCS basic_engine.cc DEPS layer gradient_accumulator
)
cc_library
(
engine SRCS basic_engine.cc partial_grad_engine.cc DEPS layer gradient_accumulator
)
cc_library
(
engine SRCS basic_engine.cc partial_grad_engine.cc DEPS layer gradient_accumulator
)
cc_library
(
imperative_profiler SRCS profiler.cc
)
cc_library
(
imperative_profiler SRCS profiler.cc
DEPS flags
)
if
(
NOT WIN32
)
if
(
NOT WIN32
)
if
(
WITH_NCCL OR WITH_RCCL
)
if
(
WITH_NCCL OR WITH_RCCL
)
cc_library
(
imperative_all_reduce SRCS all_reduce.cc DEPS collective_helper device_context selected_rows tensor
)
cc_library
(
imperative_all_reduce SRCS all_reduce.cc DEPS collective_helper device_context selected_rows tensor
)
...
...
paddle/fluid/imperative/flags.cc
浏览文件 @
ca0136a6
...
@@ -13,11 +13,11 @@
...
@@ -13,11 +13,11 @@
// limitations under the License.
// limitations under the License.
#include "paddle/fluid/imperative/flags.h"
#include "paddle/fluid/imperative/flags.h"
#include "
gflags/g
flags.h"
#include "
paddle/fluid/platform/
flags.h"
DEFINE
_uint64
(
dygraph_debug
,
0
,
PADDLE_DEFINE_EXPORTED
_uint64
(
dygraph_debug
,
0
,
"Debug level of dygraph. This flag is not "
"Debug level of dygraph. This flag is not "
"open to users"
);
"open to users"
);
namespace
paddle
{
namespace
paddle
{
namespace
imperative
{
namespace
imperative
{
...
...
paddle/fluid/imperative/profiler.cc
浏览文件 @
ca0136a6
...
@@ -19,9 +19,9 @@
...
@@ -19,9 +19,9 @@
#endif
#endif
#include <glog/logging.h>
#include <glog/logging.h>
#include <mutex> // NOLINT
#include <mutex> // NOLINT
#include "
gflags/g
flags.h"
#include "
paddle/fluid/platform/
flags.h"
DEFINE
_string
(
PADDLE_DEFINE_EXPORTED
_string
(
tracer_profile_fname
,
"xxgperf"
,
tracer_profile_fname
,
"xxgperf"
,
"Profiler filename for imperative tracer, which generated by gperftools."
"Profiler filename for imperative tracer, which generated by gperftools."
"Only valid when compiled `WITH_PROFILER=ON`. Empty if disable."
);
"Only valid when compiled `WITH_PROFILER=ON`. Empty if disable."
);
...
...
paddle/fluid/memory/allocation/CMakeLists.txt
浏览文件 @
ca0136a6
...
@@ -99,7 +99,7 @@ cc_test(allocator_facade_abs_flags_test SRCS allocator_facade_abs_flags_test.cc
...
@@ -99,7 +99,7 @@ cc_test(allocator_facade_abs_flags_test SRCS allocator_facade_abs_flags_test.cc
cc_test
(
allocator_facade_frac_flags_test SRCS allocator_facade_frac_flags_test.cc DEPS allocator_facade
)
cc_test
(
allocator_facade_frac_flags_test SRCS allocator_facade_frac_flags_test.cc DEPS allocator_facade
)
cc_library
(
auto_growth_best_fit_allocator SRCS auto_growth_best_fit_allocator.cc DEPS allocator aligned_allocator
)
cc_library
(
auto_growth_best_fit_allocator SRCS auto_growth_best_fit_allocator.cc DEPS allocator aligned_allocator
flags
)
cc_test
(
auto_growth_best_fit_allocator_facade_test SRCS auto_growth_best_fit_allocator_facade_test.cc DEPS cpu_allocator auto_growth_best_fit_allocator
)
cc_test
(
auto_growth_best_fit_allocator_facade_test SRCS auto_growth_best_fit_allocator_facade_test.cc DEPS cpu_allocator auto_growth_best_fit_allocator
)
cc_test
(
auto_growth_best_fit_allocator_test SRCS auto_growth_best_fit_allocator_test.cc DEPS auto_growth_best_fit_allocator
)
cc_test
(
auto_growth_best_fit_allocator_test SRCS auto_growth_best_fit_allocator_test.cc DEPS auto_growth_best_fit_allocator
)
...
...
paddle/fluid/memory/allocation/allocator_facade.cc
浏览文件 @
ca0136a6
...
@@ -37,14 +37,15 @@
...
@@ -37,14 +37,15 @@
#endif
#endif
#include "paddle/fluid/platform/npu_info.h"
#include "paddle/fluid/platform/npu_info.h"
DEFINE
_int64
(
PADDLE_DEFINE_EXPORTED
_int64
(
gpu_allocator_retry_time
,
10000
,
gpu_allocator_retry_time
,
10000
,
"The retry time (milliseconds) when allocator fails "
"The retry time (milliseconds) when allocator fails "
"to allocate memory. No retry if this value is not greater than 0"
);
"to allocate memory. No retry if this value is not greater than 0"
);
DEFINE_bool
(
use_system_allocator
,
false
,
PADDLE_DEFINE_EXPORTED_bool
(
"Whether to use system allocator to allocate CPU and GPU memory. "
use_system_allocator
,
false
,
"Only used for unittests."
);
"Whether to use system allocator to allocate CPU and GPU memory. "
"Only used for unittests."
);
namespace
paddle
{
namespace
paddle
{
namespace
memory
{
namespace
memory
{
...
...
paddle/fluid/memory/allocation/auto_growth_best_fit_allocator.cc
浏览文件 @
ca0136a6
...
@@ -17,18 +17,21 @@
...
@@ -17,18 +17,21 @@
#include <algorithm>
#include <algorithm>
#include <mutex> // NOLINT
#include <mutex> // NOLINT
#include "paddle/fluid/memory/allocation/aligned_allocator.h"
#include "paddle/fluid/memory/allocation/aligned_allocator.h"
#include "paddle/fluid/platform/flags.h"
DEFINE_bool
(
free_idle_chunk
,
false
,
"Whether to free idle chunk when each allocation is freed. "
PADDLE_DEFINE_READONLY_EXPORTED_bool
(
"If false, all freed allocation would be cached to speed up next "
free_idle_chunk
,
false
,
"allocation request. If true, no allocation would be cached. This "
"Whether to free idle chunk when each allocation is freed. "
"flag only works when FLAGS_allocator_strategy=auto_growth."
);
"If false, all freed allocation would be cached to speed up next "
"allocation request. If true, no allocation would be cached. This "
DEFINE_bool
(
free_when_no_cache_hit
,
false
,
"flag only works when FLAGS_allocator_strategy=auto_growth."
);
"Whether to free idle chunks when no cache hit. If true, idle "
"chunk would be freed when no cache hit; if false, idle "
PADDLE_DEFINE_READONLY_EXPORTED_bool
(
"chunk would be freed when out of memory occurs. This flag "
free_when_no_cache_hit
,
false
,
"only works when FLAGS_allocator_strategy=auto_growth."
);
"Whether to free idle chunks when no cache hit. If true, idle "
"chunk would be freed when no cache hit; if false, idle "
"chunk would be freed when out of memory occurs. This flag "
"only works when FLAGS_allocator_strategy=auto_growth."
);
namespace
paddle
{
namespace
paddle
{
namespace
memory
{
namespace
memory
{
...
...
paddle/fluid/memory/allocation/naive_best_fit_allocator.cc
浏览文件 @
ca0136a6
...
@@ -34,12 +34,13 @@
...
@@ -34,12 +34,13 @@
#include "paddle/fluid/platform/xpu/xpu_header.h"
#include "paddle/fluid/platform/xpu/xpu_header.h"
#endif
#endif
DEFINE_bool
(
init_allocated_mem
,
false
,
PADDLE_DEFINE_EXPORTED_bool
(
"It is a mistake that the values of the memory allocated by "
init_allocated_mem
,
false
,
"BuddyAllocator are always zeroed in some op's implementation. "
"It is a mistake that the values of the memory allocated by "
"To find this error in time, we use init_allocated_mem to indicate "
"BuddyAllocator are always zeroed in some op's implementation. "
"that initializing the allocated memory with a small value "
"To find this error in time, we use init_allocated_mem to indicate "
"during unit testing."
);
"that initializing the allocated memory with a small value "
"during unit testing."
);
DECLARE_double
(
fraction_of_gpu_memory_to_use
);
DECLARE_double
(
fraction_of_gpu_memory_to_use
);
DECLARE_uint64
(
initial_gpu_memory_in_mb
);
DECLARE_uint64
(
initial_gpu_memory_in_mb
);
DECLARE_uint64
(
reallocate_gpu_memory_in_mb
);
DECLARE_uint64
(
reallocate_gpu_memory_in_mb
);
...
...
paddle/fluid/operators/pscore/heter_listen_and_serv_op.cc
浏览文件 @
ca0136a6
...
@@ -15,7 +15,8 @@ limitations under the License. */
...
@@ -15,7 +15,8 @@ limitations under the License. */
#include "paddle/fluid/operators/pscore/heter_listen_and_serv_op.h"
#include "paddle/fluid/operators/pscore/heter_listen_and_serv_op.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/op_registry.h"
DEFINE_int32
(
rpc_send_thread_num
,
12
,
"number of threads for rpc send"
);
PADDLE_DEFINE_EXPORTED_int32
(
rpc_send_thread_num
,
12
,
"number of threads for rpc send"
);
namespace
paddle
{
namespace
paddle
{
namespace
operators
{
namespace
operators
{
...
...
paddle/fluid/platform/CMakeLists.txt
浏览文件 @
ca0136a6
...
@@ -37,13 +37,13 @@ if (WITH_PYTHON)
...
@@ -37,13 +37,13 @@ if (WITH_PYTHON)
endif
(
NOT WIN32
)
endif
(
NOT WIN32
)
endif
()
endif
()
cc_library
(
flags SRCS flags.cc DEPS gflags
)
cc_library
(
flags SRCS flags.cc DEPS gflags
boost
)
cc_library
(
denormal SRCS denormal.cc DEPS
)
cc_library
(
denormal SRCS denormal.cc DEPS
)
cc_library
(
errors SRCS errors.cc DEPS error_codes_proto
)
cc_library
(
errors SRCS errors.cc DEPS error_codes_proto
)
cc_test
(
errors_test SRCS errors_test.cc DEPS errors enforce
)
cc_test
(
errors_test SRCS errors_test.cc DEPS errors enforce
)
set
(
enforce_deps flags errors boost
)
set
(
enforce_deps flags errors boost
flags
)
if
(
WITH_GPU
)
if
(
WITH_GPU
)
set
(
enforce_deps
${
enforce_deps
}
external_error_proto
)
set
(
enforce_deps
${
enforce_deps
}
external_error_proto
)
endif
()
endif
()
...
...
paddle/fluid/platform/cpu_info.cc
浏览文件 @
ca0136a6
...
@@ -31,7 +31,7 @@ limitations under the License. */
...
@@ -31,7 +31,7 @@ limitations under the License. */
#endif // _WIN32
#endif // _WIN32
#include <algorithm>
#include <algorithm>
#include "
gflags/g
flags.h"
#include "
paddle/fluid/platform/
flags.h"
DECLARE_double
(
fraction_of_cpu_memory_to_use
);
DECLARE_double
(
fraction_of_cpu_memory_to_use
);
DECLARE_uint64
(
initial_cpu_memory_in_mb
);
DECLARE_uint64
(
initial_cpu_memory_in_mb
);
...
@@ -42,7 +42,8 @@ DECLARE_double(fraction_of_cuda_pinned_memory_to_use);
...
@@ -42,7 +42,8 @@ DECLARE_double(fraction_of_cuda_pinned_memory_to_use);
// between host and device. Allocates too much would reduce the amount
// between host and device. Allocates too much would reduce the amount
// of memory available to the system for paging. So, by default, we
// of memory available to the system for paging. So, by default, we
// should set false to use_pinned_memory.
// should set false to use_pinned_memory.
DEFINE_bool
(
use_pinned_memory
,
true
,
"If set, allocate cpu pinned memory."
);
PADDLE_DEFINE_EXPORTED_bool
(
use_pinned_memory
,
true
,
"If set, allocate cpu pinned memory."
);
namespace
paddle
{
namespace
paddle
{
namespace
platform
{
namespace
platform
{
...
@@ -54,7 +55,9 @@ size_t CpuTotalPhysicalMemory() {
...
@@ -54,7 +55,9 @@ size_t CpuTotalPhysicalMemory() {
mib
[
1
]
=
HW_MEMSIZE
;
mib
[
1
]
=
HW_MEMSIZE
;
int64_t
size
=
0
;
int64_t
size
=
0
;
size_t
len
=
sizeof
(
size
);
size_t
len
=
sizeof
(
size
);
if
(
sysctl
(
mib
,
2
,
&
size
,
&
len
,
NULL
,
0
)
==
0
)
return
(
size_t
)
size
;
if
(
sysctl
(
mib
,
2
,
&
size
,
&
len
,
NULL
,
0
)
==
0
)
{
return
static_cast
<
size_t
>
(
size
);
}
return
0L
;
return
0L
;
#elif defined(_WIN32)
#elif defined(_WIN32)
MEMORYSTATUSEX
sMeminfo
;
MEMORYSTATUSEX
sMeminfo
;
...
...
paddle/fluid/platform/enforce.h
浏览文件 @
ca0136a6
...
@@ -101,6 +101,7 @@ limitations under the License. */
...
@@ -101,6 +101,7 @@ limitations under the License. */
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
#include "paddle/fluid/platform/type_defs.h"
#include "paddle/fluid/platform/type_defs.h"
#endif
#endif
#include "paddle/fluid/platform/flags.h"
namespace
paddle
{
namespace
paddle
{
namespace
platform
{
namespace
platform
{
...
...
paddle/fluid/platform/flags.cc
浏览文件 @
ca0136a6
此差异已折叠。
点击以展开。
paddle/fluid/platform/flags.h
0 → 100644
浏览文件 @
ca0136a6
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <cstdint>
#include <map>
#include <string>
#include <type_traits>
#include <typeindex>
#include "boost/variant.hpp"
#include "gflags/gflags.h"
#include "paddle/fluid/platform/macros.h"
namespace
paddle
{
namespace
platform
{
struct
FlagInfo
{
using
ValueType
=
boost
::
variant
<
bool
,
int32_t
,
int64_t
,
uint64_t
,
double
,
std
::
string
>
;
std
::
string
name
;
void
*
value_ptr
;
ValueType
default_value
;
std
::
string
doc
;
bool
is_writable
;
};
using
ExportedFlagInfoMap
=
std
::
map
<
std
::
string
,
FlagInfo
>
;
const
ExportedFlagInfoMap
&
GetExportedFlagInfoMap
();
#define __PADDLE_DEFINE_EXPORTED_FLAG(__name, __is_writable, __cpp_type, \
__gflag_type, __default_value, __doc) \
DEFINE_##__gflag_type(__name, __default_value, __doc); \
struct __PaddleRegisterFlag_##__name { \
__PaddleRegisterFlag_##__name() { \
const auto &instance = ::paddle::platform::GetExportedFlagInfoMap(); \
using Type = ::paddle::platform::ExportedFlagInfoMap; \
auto &info = const_cast<Type &>(instance)[#__name]; \
info.name = #__name; \
info.value_ptr = &(FLAGS_##__name); \
info.default_value = static_cast<__cpp_type>(__default_value); \
info.doc = __doc; \
info.is_writable = __is_writable; \
} \
}; \
static_assert(std::is_same<__PaddleRegisterFlag_##__name, \
::__PaddleRegisterFlag_##__name>::value, \
"FLAGS should define in global namespace"); \
static __PaddleRegisterFlag_##__name __PaddleRegisterFlag_instance##__name
#define PADDLE_DEFINE_EXPORTED_bool(name, default_value, doc) \
__PADDLE_DEFINE_EXPORTED_FLAG(name, true, bool, bool, default_value, doc)
#define PADDLE_DEFINE_READONLY_EXPORTED_bool(name, default_value, doc) \
__PADDLE_DEFINE_EXPORTED_FLAG(name, false, bool, bool, default_value, doc)
#define PADDLE_DEFINE_EXPORTED_int32(name, default_value, doc) \
__PADDLE_DEFINE_EXPORTED_FLAG(name, true, int32_t, int32, default_value, doc)
#define PADDLE_DEFINE_EXPORTED_int64(name, default_value, doc) \
__PADDLE_DEFINE_EXPORTED_FLAG(name, true, int64_t, int64, default_value, doc)
#define PADDLE_DEFINE_EXPORTED_uint64(name, default_value, doc) \
__PADDLE_DEFINE_EXPORTED_FLAG(name, true, uint64_t, uint64, default_value, \
doc)
#define PADDLE_DEFINE_EXPORTED_double(name, default_value, doc) \
__PADDLE_DEFINE_EXPORTED_FLAG(name, true, double, double, default_value, doc)
#define PADDLE_DEFINE_EXPORTED_string(name, default_value, doc) \
__PADDLE_DEFINE_EXPORTED_FLAG(name, true, ::std::string, string, \
default_value, doc)
}
// namespace platform
}
// namespace paddle
paddle/fluid/platform/init.cc
浏览文件 @
ca0136a6
...
@@ -43,9 +43,10 @@ limitations under the License. */
...
@@ -43,9 +43,10 @@ limitations under the License. */
#endif
#endif
DECLARE_int32
(
paddle_num_threads
);
DECLARE_int32
(
paddle_num_threads
);
DEFINE_int32
(
multiple_of_cupti_buffer_size
,
1
,
PADDLE_DEFINE_EXPORTED_int32
(
"Multiple of the CUPTI device buffer size. If the timestamps have "
multiple_of_cupti_buffer_size
,
1
,
"been dropped when you are profiling, try increasing this value."
);
"Multiple of the CUPTI device buffer size. If the timestamps have "
"been dropped when you are profiling, try increasing this value."
);
namespace
paddle
{
namespace
paddle
{
namespace
platform
{
namespace
platform
{
...
...
paddle/fluid/platform/place.cc
浏览文件 @
ca0136a6
...
@@ -14,11 +14,12 @@ limitations under the License. */
...
@@ -14,11 +14,12 @@ limitations under the License. */
#include "paddle/fluid/platform/place.h"
#include "paddle/fluid/platform/place.h"
DEFINE_bool
(
benchmark
,
false
,
PADDLE_DEFINE_EXPORTED_bool
(
"Doing memory benchmark. It will make deleting scope synchronized, "
benchmark
,
false
,
"and add some memory usage logs."
"Doing memory benchmark. It will make deleting scope synchronized, "
"Default cuda is asynchronous device, set to True will"
"and add some memory usage logs."
"force op run in synchronous mode."
);
"Default cuda is asynchronous device, set to True will"
"force op run in synchronous mode."
);
namespace
paddle
{
namespace
paddle
{
namespace
platform
{
namespace
platform
{
...
...
paddle/fluid/platform/profiler.cc
浏览文件 @
ca0136a6
...
@@ -24,7 +24,8 @@ limitations under the License. */
...
@@ -24,7 +24,8 @@ limitations under the License. */
#include "paddle/fluid/platform/dynload/nvtx.h"
#include "paddle/fluid/platform/dynload/nvtx.h"
#endif
#endif
DEFINE_bool
(
enable_rpc_profiler
,
false
,
"Enable rpc profiler or not."
);
PADDLE_DEFINE_EXPORTED_bool
(
enable_rpc_profiler
,
false
,
"Enable rpc profiler or not."
);
namespace
paddle
{
namespace
paddle
{
namespace
platform
{
namespace
platform
{
...
...
paddle/fluid/platform/xpu/xpu_info.cc
浏览文件 @
ca0136a6
...
@@ -18,14 +18,15 @@ limitations under the License. */
...
@@ -18,14 +18,15 @@ limitations under the License. */
#include "paddle/fluid/platform/xpu/xpu_header.h"
#include "paddle/fluid/platform/xpu/xpu_header.h"
#include "paddle/fluid/string/split.h"
#include "paddle/fluid/string/split.h"
DEFINE_string
(
selected_xpus
,
""
,
PADDLE_DEFINE_EXPORTED_string
(
"A list of device ids separated by comma, like: 0,1,2,3. "
selected_xpus
,
""
,
"This option is useful when doing multi process training and "
"A list of device ids separated by comma, like: 0,1,2,3. "
"each process have only one device (XPU). If you want to use "
"This option is useful when doing multi process training and "
"all visible devices, set this to empty string. NOTE: the "
"each process have only one device (XPU). If you want to use "
"reason of doing this is that we want to use P2P communication"
"all visible devices, set this to empty string. NOTE: the "
"between XPU devices, use XPU_VISIBLE_DEVICES can only use"
"reason of doing this is that we want to use P2P communication"
"share-memory only."
);
"between XPU devices, use XPU_VISIBLE_DEVICES can only use"
"share-memory only."
);
namespace
paddle
{
namespace
paddle
{
namespace
platform
{
namespace
platform
{
...
...
paddle/fluid/pybind/global_value_getter_setter.cc
浏览文件 @
ca0136a6
...
@@ -29,82 +29,8 @@
...
@@ -29,82 +29,8 @@
#include "paddle/fluid/platform/macros.h"
#include "paddle/fluid/platform/macros.h"
#include "pybind11/stl.h"
#include "pybind11/stl.h"
// data processing
// NOTE: where is these 2 flags from?
DECLARE_bool
(
use_mkldnn
);
DECLARE_string
(
tracer_mkldnn_ops_on
);
DECLARE_string
(
tracer_mkldnn_ops_off
);
// debug
DECLARE_bool
(
check_nan_inf
);
DECLARE_bool
(
cpu_deterministic
);
DECLARE_bool
(
enable_rpc_profiler
);
DECLARE_int32
(
multiple_of_cupti_buffer_size
);
DECLARE_bool
(
reader_queue_speed_test_mode
);
DECLARE_int32
(
call_stack_level
);
DECLARE_bool
(
sort_sum_gradient
);
DECLARE_bool
(
check_kernel_launch
);
// device management
DECLARE_int32
(
paddle_num_threads
);
// executor
DECLARE_bool
(
enable_parallel_graph
);
DECLARE_string
(
pe_profile_fname
);
DECLARE_string
(
print_sub_graph_dir
);
DECLARE_bool
(
use_ngraph
);
// memory management
DECLARE_string
(
allocator_strategy
);
DECLARE_double
(
eager_delete_tensor_gb
);
DECLARE_double
(
fraction_of_cpu_memory_to_use
);
DECLARE_bool
(
free_idle_chunk
);
DECLARE_bool
(
free_when_no_cache_hit
);
DECLARE_int32
(
fuse_parameter_groups_size
);
DECLARE_double
(
fuse_parameter_memory_size
);
DECLARE_bool
(
init_allocated_mem
);
DECLARE_uint64
(
initial_cpu_memory_in_mb
);
DECLARE_double
(
memory_fraction_of_eager_deletion
);
DECLARE_bool
(
use_pinned_memory
);
DECLARE_bool
(
use_system_allocator
);
// others
DECLARE_bool
(
benchmark
);
DECLARE_int32
(
inner_op_parallelism
);
DECLARE_int32
(
max_inplace_grad_add
);
DECLARE_string
(
tracer_profile_fname
);
DECLARE_bool
(
apply_pass_to_program
);
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
// cudnn
DECLARE_uint64
(
conv_workspace_size_limit
);
DECLARE_bool
(
cudnn_batchnorm_spatial_persistent
);
DECLARE_bool
(
cudnn_deterministic
);
DECLARE_bool
(
cudnn_exhaustive_search
);
DECLARE_bool
(
conv2d_disable_cudnn
);
// data processing
DECLARE_bool
(
enable_cublas_tensor_op_math
);
// device management
DECLARE_string
(
selected_gpus
);
// memory management
DECLARE_bool
(
eager_delete_scope
);
DECLARE_bool
(
fast_eager_deletion_mode
);
DECLARE_double
(
fraction_of_cuda_pinned_memory_to_use
);
DECLARE_double
(
fraction_of_gpu_memory_to_use
);
DECLARE_uint64
(
gpu_memory_limit_mb
);
DECLARE_uint64
(
initial_gpu_memory_in_mb
);
DECLARE_uint64
(
reallocate_gpu_memory_in_mb
);
// others
DECLARE_bool
(
sync_nccl_allreduce
);
#endif
#ifdef PADDLE_WITH_XPU
// device management
DECLARE_string
(
selected_xpus
);
#endif
#ifdef PADDLE_WITH_ASCEND_CL
// device management
DECLARE_string
(
selected_npus
);
// set minmum loss scaling value
DECLARE_int32
(
min_loss_scaling
);
#endif
#ifdef PADDLE_WITH_DISTRIBUTE
#ifdef PADDLE_WITH_DISTRIBUTE
DECLARE_int32
(
rpc_send_thread_num
);
DECLARE_int32
(
rpc_get_thread_num
);
DECLARE_int32
(
rpc_get_thread_num
);
DECLARE_int32
(
rpc_prefetch_thread_num
);
DECLARE_int32
(
rpc_prefetch_thread_num
);
#endif
#endif
...
@@ -181,7 +107,6 @@ class PYBIND11_HIDDEN GlobalVarGetterSetterRegistry {
...
@@ -181,7 +107,6 @@ class PYBIND11_HIDDEN GlobalVarGetterSetterRegistry {
PADDLE_ENFORCE_NOT_NULL
(
setter
,
PADDLE_ENFORCE_NOT_NULL
(
setter
,
platform
::
errors
::
InvalidArgument
(
platform
::
errors
::
InvalidArgument
(
"Setter of %s should not be null"
,
name
));
"Setter of %s should not be null"
,
name
));
var_infos_
.
insert
({
name
,
VarInfo
(
is_public
,
getter
,
setter
)});
var_infos_
.
insert
({
name
,
VarInfo
(
is_public
,
getter
,
setter
)});
}
}
...
@@ -243,81 +168,6 @@ class PYBIND11_HIDDEN GlobalVarGetterSetterRegistry {
...
@@ -243,81 +168,6 @@ class PYBIND11_HIDDEN GlobalVarGetterSetterRegistry {
GlobalVarGetterSetterRegistry
GlobalVarGetterSetterRegistry
::
instance_
;
GlobalVarGetterSetterRegistry
GlobalVarGetterSetterRegistry
::
instance_
;
class
GlobalVarGetterSetterRegistryHelper
{
public:
GlobalVarGetterSetterRegistryHelper
(
bool
is_public
,
bool
is_writable
,
const
std
::
string
&
var_names
)
:
is_public_
(
is_public
),
is_writable_
(
is_writable
),
var_names_
(
SplitVarNames
(
var_names
))
{}
template
<
typename
...
Args
>
void
Register
(
Args
&&
...
args
)
const
{
Impl
<
0
,
sizeof
...(
args
)
==
1
,
Args
...
>::
Register
(
is_public_
,
is_writable_
,
var_names_
,
std
::
forward
<
Args
>
(
args
)...);
}
private:
static
std
::
vector
<
std
::
string
>
SplitVarNames
(
const
std
::
string
&
names
)
{
auto
valid_char
=
[](
char
ch
)
{
return
!
std
::
isspace
(
ch
)
&&
ch
!=
','
;
};
std
::
vector
<
std
::
string
>
ret
;
size_t
i
=
0
,
j
=
0
,
n
=
names
.
size
();
while
(
i
<
n
)
{
for
(;
i
<
n
&&
!
valid_char
(
names
[
i
]);
++
i
)
{
}
for
(
j
=
i
+
1
;
j
<
n
&&
valid_char
(
names
[
j
]);
++
j
)
{
}
if
(
i
<
n
&&
j
<=
n
)
{
auto
substring
=
names
.
substr
(
i
,
j
-
i
);
VLOG
(
10
)
<<
"Get substring:
\"
"
<<
substring
<<
"
\"
"
;
ret
.
emplace_back
(
substring
);
}
i
=
j
+
1
;
}
return
ret
;
}
private:
template
<
size_t
kIdx
,
bool
kIsStop
,
typename
T
,
typename
...
Args
>
struct
Impl
{
static
void
Register
(
bool
is_public
,
bool
is_writable
,
const
std
::
vector
<
std
::
string
>
&
var_names
,
T
&&
var
,
Args
&&
...
args
)
{
PADDLE_ENFORCE_EQ
(
kIdx
+
1
+
sizeof
...(
args
),
var_names
.
size
(),
platform
::
errors
::
InvalidArgument
(
"Argument number not match name number"
));
Impl
<
kIdx
,
true
,
T
>::
Register
(
is_public
,
is_writable
,
var_names
,
var
);
Impl
<
kIdx
+
1
,
sizeof
...(
Args
)
==
1
,
Args
...
>::
Register
(
is_public
,
is_writable
,
var_names
,
std
::
forward
<
Args
>
(
args
)...);
}
};
template
<
size_t
kIdx
,
typename
T
>
struct
Impl
<
kIdx
,
true
,
T
>
{
static
void
Register
(
bool
is_public
,
bool
is_writable
,
const
std
::
vector
<
std
::
string
>
&
var_names
,
T
&&
var
)
{
auto
*
instance
=
GlobalVarGetterSetterRegistry
::
MutableInstance
();
if
(
is_writable
)
{
instance
->
Register
(
var_names
[
kIdx
],
is_public
,
GlobalVarGetterSetterRegistry
::
CreateGetter
(
std
::
forward
<
T
>
(
var
)),
GlobalVarGetterSetterRegistry
::
CreateSetter
(
&
var
));
}
else
{
instance
->
Register
(
var_names
[
kIdx
],
is_public
,
GlobalVarGetterSetterRegistry
::
CreateGetter
(
std
::
forward
<
T
>
(
var
)));
}
}
};
private:
const
bool
is_public_
;
const
bool
is_writable_
;
const
std
::
vector
<
std
::
string
>
var_names_
;
};
static
void
RegisterGlobalVarGetterSetter
();
static
void
RegisterGlobalVarGetterSetter
();
void
BindGlobalValueGetterSetter
(
pybind11
::
module
*
module
)
{
void
BindGlobalValueGetterSetter
(
pybind11
::
module
*
module
)
{
...
@@ -338,65 +188,51 @@ void BindGlobalValueGetterSetter(pybind11::module *module) {
...
@@ -338,65 +188,51 @@ void BindGlobalValueGetterSetter(pybind11::module *module) {
}
}
/* Public vars are designed to be writable. */
/* Public vars are designed to be writable. */
#define REGISTER_PUBLIC_GLOBAL_VAR(...) \
#define REGISTER_PUBLIC_GLOBAL_VAR(var) \
do { \
do { \
GlobalVarGetterSetterRegistryHelper(
/*is_public=*/
true, \
auto *instance = GlobalVarGetterSetterRegistry::MutableInstance(); \
/*is_writable=*/
true, "" #__VA_ARGS__) \
instance->Register(#var,
/*is_public=*/
true, \
.Register(__VA_ARGS__); \
GlobalVarGetterSetterRegistry::CreateGetter(var), \
GlobalVarGetterSetterRegistry::CreateSetter(&var)); \
} while (0)
} while (0)
#define REGISTER_PRIVATE_GLOBAL_VAR(is_writable, ...) \
struct
RegisterGetterSetterVisitor
:
public
boost
::
static_visitor
<
void
>
{
do { \
RegisterGetterSetterVisitor
(
const
std
::
string
&
name
,
bool
is_public
,
GlobalVarGetterSetterRegistryHelper(
/*is_public=*/
false, is_writable, \
void
*
value_ptr
)
"" #__VA_ARGS__) \
:
name_
(
name
),
value_ptr_
(
value_ptr
)
{}
.Register(__VA_ARGS__); \
} while (0)
static
void
RegisterGlobalVarGetterSetter
()
{
template
<
typename
T
>
REGISTER_PRIVATE_GLOBAL_VAR
(
/*is_writable=*/
false
,
FLAGS_free_idle_chunk
,
void
operator
()(
const
T
&
)
const
{
FLAGS_free_when_no_cache_hit
);
auto
&
value
=
*
static_cast
<
T
*>
(
value_ptr_
);
auto
*
instance
=
GlobalVarGetterSetterRegistry
::
MutableInstance
();
REGISTER_PUBLIC_GLOBAL_VAR
(
instance
->
Register
(
name_
,
is_public_
,
FLAGS_eager_delete_tensor_gb
,
FLAGS_enable_parallel_graph
,
GlobalVarGetterSetterRegistry
::
CreateGetter
(
value
),
FLAGS_allocator_strategy
,
FLAGS_use_system_allocator
,
FLAGS_check_nan_inf
,
GlobalVarGetterSetterRegistry
::
CreateSetter
(
&
value
));
FLAGS_call_stack_level
,
FLAGS_sort_sum_gradient
,
FLAGS_cpu_deterministic
,
}
FLAGS_enable_rpc_profiler
,
FLAGS_multiple_of_cupti_buffer_size
,
FLAGS_reader_queue_speed_test_mode
,
FLAGS_pe_profile_fname
,
FLAGS_print_sub_graph_dir
,
FLAGS_fraction_of_cpu_memory_to_use
,
FLAGS_fuse_parameter_groups_size
,
FLAGS_fuse_parameter_memory_size
,
FLAGS_init_allocated_mem
,
FLAGS_initial_cpu_memory_in_mb
,
FLAGS_memory_fraction_of_eager_deletion
,
FLAGS_use_pinned_memory
,
FLAGS_benchmark
,
FLAGS_inner_op_parallelism
,
FLAGS_tracer_profile_fname
,
FLAGS_paddle_num_threads
,
FLAGS_use_mkldnn
,
FLAGS_max_inplace_grad_add
,
FLAGS_tracer_mkldnn_ops_on
,
FLAGS_tracer_mkldnn_ops_off
,
FLAGS_apply_pass_to_program
);
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
REGISTER_PUBLIC_GLOBAL_VAR
(
FLAGS_gpu_memory_limit_mb
,
FLAGS_cudnn_deterministic
,
FLAGS_conv_workspace_size_limit
,
FLAGS_cudnn_batchnorm_spatial_persistent
,
FLAGS_cudnn_exhaustive_search
,
FLAGS_eager_delete_scope
,
FLAGS_fast_eager_deletion_mode
,
FLAGS_fraction_of_cuda_pinned_memory_to_use
,
FLAGS_fraction_of_gpu_memory_to_use
,
FLAGS_initial_gpu_memory_in_mb
,
FLAGS_reallocate_gpu_memory_in_mb
,
FLAGS_enable_cublas_tensor_op_math
,
FLAGS_selected_gpus
,
FLAGS_sync_nccl_allreduce
,
FLAGS_conv2d_disable_cudnn
,
FLAGS_check_kernel_launch
);
#endif
#ifdef PADDLE_WITH_XPU
REGISTER_PUBLIC_GLOBAL_VAR
(
FLAGS_selected_xpus
);
#endif
#ifdef PADDLE_WITH_ASCEND_CL
private:
REGISTER_PUBLIC_GLOBAL_VAR
(
FLAGS_selected_npus
);
std
::
string
name_
;
REGISTER_PUBLIC_GLOBAL_VAR
(
FLAGS_min_loss_scaling
);
bool
is_public_
;
#endif
void
*
value_ptr_
;
};
static
void
RegisterGlobalVarGetterSetter
()
{
#ifdef PADDLE_WITH_DITRIBUTE
#ifdef PADDLE_WITH_DITRIBUTE
REGISTER_PUBLIC_GLOBAL_VAR
(
FLAGS_rpc_send_thread_num
,
REGISTER_PUBLIC_GLOBAL_VAR
(
FLAGS_rpc_get_thread_num
);
FLAGS_rpc_get_thread_num
,
REGISTER_PUBLIC_GLOBAL_VAR
(
FLAGS_rpc_prefetch_thread_num
);
FLAGS_rpc_prefetch_thread_num
);
#endif
#endif
const
auto
&
flag_map
=
platform
::
GetExportedFlagInfoMap
();
for
(
const
auto
&
pair
:
flag_map
)
{
const
std
::
string
&
name
=
pair
.
second
.
name
;
bool
is_writable
=
pair
.
second
.
is_writable
;
void
*
value_ptr
=
const_cast
<
void
*>
(
pair
.
second
.
value_ptr
);
const
auto
&
default_value
=
pair
.
second
.
default_value
;
RegisterGetterSetterVisitor
visitor
(
"FLAGS_"
+
name
,
is_writable
,
value_ptr
);
boost
::
apply_visitor
(
visitor
,
default_value
);
}
}
}
}
// namespace pybind
}
// namespace pybind
}
// namespace paddle
}
// namespace paddle
paddle/fluid/pybind/reader_py.cc
浏览文件 @
ca0136a6
...
@@ -32,9 +32,10 @@
...
@@ -32,9 +32,10 @@
#include "paddle/fluid/platform/place.h"
#include "paddle/fluid/platform/place.h"
#include "pybind11/stl.h"
#include "pybind11/stl.h"
DEFINE_bool
(
reader_queue_speed_test_mode
,
false
,
PADDLE_DEFINE_EXPORTED_bool
(
"If set true, the queue.pop will only get data from queue but not "
reader_queue_speed_test_mode
,
false
,
"remove the data from queue for speed testing"
);
"If set true, the queue.pop will only get data from queue but not "
"remove the data from queue for speed testing"
);
namespace
paddle
{
namespace
paddle
{
namespace
pybind
{
namespace
pybind
{
...
...
python/paddle/fluid/__init__.py
浏览文件 @
ca0136a6
...
@@ -176,83 +176,23 @@ def __bootstrap__():
...
@@ -176,83 +176,23 @@ def __bootstrap__():
print
(
'PLEASE USE OMP_NUM_THREADS WISELY.'
,
file
=
sys
.
stderr
)
print
(
'PLEASE USE OMP_NUM_THREADS WISELY.'
,
file
=
sys
.
stderr
)
os
.
environ
[
'OMP_NUM_THREADS'
]
=
str
(
num_threads
)
os
.
environ
[
'OMP_NUM_THREADS'
]
=
str
(
num_threads
)
sysstr
=
platform
.
system
()
flag_prefix
=
'FLAGS_'
read_env_flags
=
[
read_env_flags
=
[
'check_nan_inf'
,
key
[
len
(
flag_prefix
):]
for
key
in
core
.
globals
().
keys
()
'convert_all_blocks'
,
if
key
.
startswith
(
flag_prefix
)
'benchmark'
,
'eager_delete_scope'
,
'fraction_of_cpu_memory_to_use'
,
'initial_cpu_memory_in_mb'
,
'init_allocated_mem'
,
'paddle_num_threads'
,
'dist_threadpool_size'
,
'eager_delete_tensor_gb'
,
'fast_eager_deletion_mode'
,
'memory_fraction_of_eager_deletion'
,
'allocator_strategy'
,
'reader_queue_speed_test_mode'
,
'print_sub_graph_dir'
,
'pe_profile_fname'
,
'inner_op_parallelism'
,
'enable_parallel_graph'
,
'fuse_parameter_groups_size'
,
'multiple_of_cupti_buffer_size'
,
'fuse_parameter_memory_size'
,
'tracer_profile_fname'
,
'dygraph_debug'
,
'use_system_allocator'
,
'enable_unused_var_check'
,
'free_idle_chunk'
,
'free_when_no_cache_hit'
,
'call_stack_level'
,
'sort_sum_gradient'
,
'max_inplace_grad_add'
,
'apply_pass_to_program'
,
'new_executor_use_inplace'
,
]
]
if
'Darwin'
not
in
sysstr
:
read_env_flags
.
append
(
'use_pinned_memory'
)
if
os
.
name
!=
'nt'
:
def
remove_flag_if_exists
(
name
):
read_env_flags
.
append
(
'cpu_deterministic'
)
if
name
in
read_env_flags
:
read_env_flags
.
remove
(
name
)
if
core
.
is_compiled_with_mkldnn
():
sysstr
=
platform
.
system
()
read_env_flags
.
append
(
'use_mkldnn'
)
if
'Darwin'
in
sysstr
:
read_env_flags
.
append
(
'tracer_mkldnn_ops_on'
)
remove_flags_if_exists
(
'use_pinned_memory'
)
read_env_flags
.
append
(
'tracer_mkldnn_ops_off'
)
if
core
.
is_compiled_with_cuda
():
read_env_flags
+=
[
'fraction_of_gpu_memory_to_use'
,
'initial_gpu_memory_in_mb'
,
'reallocate_gpu_memory_in_mb'
,
'cudnn_deterministic'
,
'enable_cublas_tensor_op_math'
,
'conv_workspace_size_limit'
,
'cudnn_exhaustive_search'
,
'selected_gpus'
,
'sync_nccl_allreduce'
,
'cudnn_batchnorm_spatial_persistent'
,
'gpu_allocator_retry_time'
,
'local_exe_sub_scope_limit'
,
'gpu_memory_limit_mb'
,
'conv2d_disable_cudnn'
,
'get_host_by_name_time'
,
]
if
core
.
is_compiled_with_npu
():
if
os
.
name
==
'nt'
:
read_env_flags
+=
[
remove_flags_if_exists
(
'cpu_deterministic'
)
'selected_npus'
,
'fraction_of_gpu_memory_to_use'
,
'initial_gpu_memory_in_mb'
,
'reallocate_gpu_memory_in_mb'
,
'gpu_memory_limit_mb'
,
'npu_config_path'
,
'get_host_by_name_time'
,
'hccl_check_nan'
,
'min_loss_scaling'
,
]
core
.
init_gflags
([
"--tryfromenv="
+
","
.
join
(
read_env_flags
)])
core
.
init_gflags
([
"--tryfromenv="
+
","
.
join
(
read_env_flags
)])
# Note(zhouwei25): sys may not have argv in some cases,
# Note(zhouwei25): sys may not have argv in some cases,
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录