Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
Crayon鑫
Paddle
提交
ca0136a6
P
Paddle
项目概览
Crayon鑫
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
ca0136a6
编写于
9月 16, 2021
作者:
S
sneaxiy
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
make flag setter easier
上级
e93c18a3
变更
28
展开全部
隐藏空白更改
内联
并排
Showing
28 changed file
with
375 addition
and
462 deletion
+375
-462
paddle/fluid/framework/details/reduce_op_handle.cc
paddle/fluid/framework/details/reduce_op_handle.cc
+1
-1
paddle/fluid/framework/ir/coalesce_grad_tensor_pass.cc
paddle/fluid/framework/ir/coalesce_grad_tensor_pass.cc
+8
-7
paddle/fluid/framework/ir/graph.cc
paddle/fluid/framework/ir/graph.cc
+2
-2
paddle/fluid/framework/ir/graph_helper.cc
paddle/fluid/framework/ir/graph_helper.cc
+3
-3
paddle/fluid/framework/operator.cc
paddle/fluid/framework/operator.cc
+2
-1
paddle/fluid/framework/parallel_executor.cc
paddle/fluid/framework/parallel_executor.cc
+7
-5
paddle/fluid/framework/scope.cc
paddle/fluid/framework/scope.cc
+1
-1
paddle/fluid/framework/unused_var_check.cc
paddle/fluid/framework/unused_var_check.cc
+5
-4
paddle/fluid/imperative/CMakeLists.txt
paddle/fluid/imperative/CMakeLists.txt
+1
-1
paddle/fluid/imperative/flags.cc
paddle/fluid/imperative/flags.cc
+4
-4
paddle/fluid/imperative/profiler.cc
paddle/fluid/imperative/profiler.cc
+2
-2
paddle/fluid/memory/allocation/CMakeLists.txt
paddle/fluid/memory/allocation/CMakeLists.txt
+1
-1
paddle/fluid/memory/allocation/allocator_facade.cc
paddle/fluid/memory/allocation/allocator_facade.cc
+5
-4
paddle/fluid/memory/allocation/auto_growth_best_fit_allocator.cc
...fluid/memory/allocation/auto_growth_best_fit_allocator.cc
+15
-12
paddle/fluid/memory/allocation/naive_best_fit_allocator.cc
paddle/fluid/memory/allocation/naive_best_fit_allocator.cc
+7
-6
paddle/fluid/operators/pscore/heter_listen_and_serv_op.cc
paddle/fluid/operators/pscore/heter_listen_and_serv_op.cc
+2
-1
paddle/fluid/platform/CMakeLists.txt
paddle/fluid/platform/CMakeLists.txt
+2
-2
paddle/fluid/platform/cpu_info.cc
paddle/fluid/platform/cpu_info.cc
+6
-3
paddle/fluid/platform/enforce.h
paddle/fluid/platform/enforce.h
+1
-0
paddle/fluid/platform/flags.cc
paddle/fluid/platform/flags.cc
+139
-107
paddle/fluid/platform/flags.h
paddle/fluid/platform/flags.h
+85
-0
paddle/fluid/platform/init.cc
paddle/fluid/platform/init.cc
+4
-3
paddle/fluid/platform/place.cc
paddle/fluid/platform/place.cc
+6
-5
paddle/fluid/platform/profiler.cc
paddle/fluid/platform/profiler.cc
+2
-1
paddle/fluid/platform/xpu/xpu_info.cc
paddle/fluid/platform/xpu/xpu_info.cc
+9
-8
paddle/fluid/pybind/global_value_getter_setter.cc
paddle/fluid/pybind/global_value_getter_setter.cc
+39
-203
paddle/fluid/pybind/reader_py.cc
paddle/fluid/pybind/reader_py.cc
+4
-3
python/paddle/fluid/__init__.py
python/paddle/fluid/__init__.py
+12
-72
未找到文件。
paddle/fluid/framework/details/reduce_op_handle.cc
浏览文件 @
ca0136a6
...
@@ -19,7 +19,7 @@
...
@@ -19,7 +19,7 @@
#include "paddle/fluid/framework/details/variable_visitor.h"
#include "paddle/fluid/framework/details/variable_visitor.h"
#include "paddle/fluid/platform/profiler.h"
#include "paddle/fluid/platform/profiler.h"
DEFINE
_bool
(
PADDLE_DEFINE_EXPORTED
_bool
(
cpu_deterministic
,
false
,
cpu_deterministic
,
false
,
"Whether to make the result of computation deterministic in CPU side."
);
"Whether to make the result of computation deterministic in CPU side."
);
...
...
paddle/fluid/framework/ir/coalesce_grad_tensor_pass.cc
浏览文件 @
ca0136a6
...
@@ -25,13 +25,14 @@ class VarDesc;
...
@@ -25,13 +25,14 @@ class VarDesc;
}
// namespace framework
}
// namespace framework
}
// namespace paddle
}
// namespace paddle
DEFINE_double
(
fuse_parameter_memory_size
,
-
1.0
,
// MBytes
PADDLE_DEFINE_EXPORTED_double
(
"fuse_parameter_memory_size is up limited memory size(MB)"
fuse_parameter_memory_size
,
-
1.0
,
// MBytes
"of one group parameters' gradient which is the input "
"fuse_parameter_memory_size is up limited memory size(MB)"
"of communication calling(e.g NCCLAllReduce). "
"of one group parameters' gradient which is the input "
"The default value is 0, it means that "
"of communication calling(e.g NCCLAllReduce). "
"not set group according to memory_size."
);
"The default value is 0, it means that "
DEFINE_int32
(
"not set group according to memory_size."
);
PADDLE_DEFINE_EXPORTED_int32
(
fuse_parameter_groups_size
,
1
,
fuse_parameter_groups_size
,
1
,
"fuse_parameter_groups_size is the up limited size of one group "
"fuse_parameter_groups_size is the up limited size of one group "
"parameters' gradient. "
"parameters' gradient. "
...
...
paddle/fluid/framework/ir/graph.cc
浏览文件 @
ca0136a6
...
@@ -17,8 +17,8 @@ limitations under the License. */
...
@@ -17,8 +17,8 @@ limitations under the License. */
#include "paddle/fluid/framework/ir/graph.h"
#include "paddle/fluid/framework/ir/graph.h"
#include "paddle/fluid/framework/operator.h"
#include "paddle/fluid/framework/operator.h"
DEFINE
_bool
(
convert_all_blocks
,
true
,
PADDLE_DEFINE_EXPORTED
_bool
(
convert_all_blocks
,
true
,
"Convert all blocks in program into SSAgraphs"
);
"Convert all blocks in program into SSAgraphs"
);
namespace
paddle
{
namespace
paddle
{
namespace
framework
{
namespace
framework
{
...
...
paddle/fluid/framework/ir/graph_helper.cc
浏览文件 @
ca0136a6
...
@@ -18,9 +18,9 @@ limitations under the License. */
...
@@ -18,9 +18,9 @@ limitations under the License. */
#include "paddle/fluid/framework/op_proto_maker.h"
#include "paddle/fluid/framework/op_proto_maker.h"
DECLARE_bool
(
convert_all_blocks
);
DECLARE_bool
(
convert_all_blocks
);
DEFINE
_string
(
print_sub_graph_dir
,
""
,
PADDLE_DEFINE_EXPORTED
_string
(
print_sub_graph_dir
,
""
,
"FLAGS_print_sub_graph_dir is used "
"FLAGS_print_sub_graph_dir is used "
"to print the nodes of sub_graphs."
);
"to print the nodes of sub_graphs."
);
namespace
paddle
{
namespace
paddle
{
namespace
framework
{
namespace
framework
{
...
...
paddle/fluid/framework/operator.cc
浏览文件 @
ca0136a6
...
@@ -47,7 +47,8 @@ class LoDTensor;
...
@@ -47,7 +47,8 @@ class LoDTensor;
DECLARE_bool
(
benchmark
);
DECLARE_bool
(
benchmark
);
DECLARE_bool
(
check_nan_inf
);
DECLARE_bool
(
check_nan_inf
);
DECLARE_bool
(
enable_unused_var_check
);
DECLARE_bool
(
enable_unused_var_check
);
DEFINE_int32
(
inner_op_parallelism
,
0
,
"number of threads for inner op"
);
PADDLE_DEFINE_EXPORTED_int32
(
inner_op_parallelism
,
0
,
"number of threads for inner op"
);
namespace
paddle
{
namespace
paddle
{
namespace
framework
{
namespace
framework
{
...
...
paddle/fluid/framework/parallel_executor.cc
浏览文件 @
ca0136a6
...
@@ -46,11 +46,13 @@ DECLARE_double(eager_delete_tensor_gb);
...
@@ -46,11 +46,13 @@ DECLARE_double(eager_delete_tensor_gb);
#ifdef WITH_GPERFTOOLS
#ifdef WITH_GPERFTOOLS
#include "gperftools/profiler.h"
#include "gperftools/profiler.h"
#endif
#endif
DEFINE_string
(
pe_profile_fname
,
""
,
PADDLE_DEFINE_EXPORTED_string
(
"Profiler filename for PE, which generated by gperftools."
pe_profile_fname
,
""
,
"Only valid when compiled `WITH_PRIFILER=ON`. Empty if disable."
);
"Profiler filename for PE, which generated by gperftools."
DEFINE_bool
(
enable_parallel_graph
,
false
,
"Only valid when compiled `WITH_PRIFILER=ON`. Empty if disable."
);
"Force disable parallel graph execution mode if set false."
);
PADDLE_DEFINE_EXPORTED_bool
(
enable_parallel_graph
,
false
,
"Force disable parallel graph execution mode if set false."
);
namespace
paddle
{
namespace
paddle
{
namespace
framework
{
namespace
framework
{
...
...
paddle/fluid/framework/scope.cc
浏览文件 @
ca0136a6
...
@@ -19,7 +19,7 @@ limitations under the License. */
...
@@ -19,7 +19,7 @@ limitations under the License. */
DECLARE_bool
(
benchmark
);
DECLARE_bool
(
benchmark
);
DEFINE
_bool
(
PADDLE_DEFINE_EXPORTED
_bool
(
eager_delete_scope
,
true
,
eager_delete_scope
,
true
,
"Delete local scope eagerly. It will reduce GPU memory usage but "
"Delete local scope eagerly. It will reduce GPU memory usage but "
"slow down the destruction of variables.(around 1% performance harm)"
);
"slow down the destruction of variables.(around 1% performance harm)"
);
...
...
paddle/fluid/framework/unused_var_check.cc
浏览文件 @
ca0136a6
...
@@ -17,15 +17,16 @@ limitations under the License. */
...
@@ -17,15 +17,16 @@ limitations under the License. */
#include <glog/logging.h>
#include <glog/logging.h>
#include <string>
#include <string>
#include "gflags/gflags.h"
#include "paddle/fluid/framework/no_need_buffer_vars_inference.h"
#include "paddle/fluid/framework/no_need_buffer_vars_inference.h"
#include "paddle/fluid/framework/op_info.h"
#include "paddle/fluid/framework/op_info.h"
#include "paddle/fluid/framework/operator.h"
#include "paddle/fluid/framework/operator.h"
#include "paddle/fluid/platform/enforce.h"
#include "paddle/fluid/platform/enforce.h"
#include "paddle/fluid/platform/flags.h"
DEFINE_bool
(
enable_unused_var_check
,
false
,
PADDLE_DEFINE_EXPORTED_bool
(
"Checking whether operator contains unused inputs, "
enable_unused_var_check
,
false
,
"especially for grad operator. It should be in unittest."
);
"Checking whether operator contains unused inputs, "
"especially for grad operator. It should be in unittest."
);
namespace
paddle
{
namespace
paddle
{
namespace
framework
{
namespace
framework
{
...
...
paddle/fluid/imperative/CMakeLists.txt
浏览文件 @
ca0136a6
...
@@ -11,7 +11,7 @@ cc_library(amp SRCS amp_auto_cast.cc DEPS layer )
...
@@ -11,7 +11,7 @@ cc_library(amp SRCS amp_auto_cast.cc DEPS layer )
cc_library
(
tracer SRCS tracer.cc DEPS layer engine program_desc_tracer amp denormal
)
cc_library
(
tracer SRCS tracer.cc DEPS layer engine program_desc_tracer amp denormal
)
cc_library
(
basic_engine SRCS basic_engine.cc DEPS layer gradient_accumulator
)
cc_library
(
basic_engine SRCS basic_engine.cc DEPS layer gradient_accumulator
)
cc_library
(
engine SRCS basic_engine.cc partial_grad_engine.cc DEPS layer gradient_accumulator
)
cc_library
(
engine SRCS basic_engine.cc partial_grad_engine.cc DEPS layer gradient_accumulator
)
cc_library
(
imperative_profiler SRCS profiler.cc
)
cc_library
(
imperative_profiler SRCS profiler.cc
DEPS flags
)
if
(
NOT WIN32
)
if
(
NOT WIN32
)
if
(
WITH_NCCL OR WITH_RCCL
)
if
(
WITH_NCCL OR WITH_RCCL
)
cc_library
(
imperative_all_reduce SRCS all_reduce.cc DEPS collective_helper device_context selected_rows tensor
)
cc_library
(
imperative_all_reduce SRCS all_reduce.cc DEPS collective_helper device_context selected_rows tensor
)
...
...
paddle/fluid/imperative/flags.cc
浏览文件 @
ca0136a6
...
@@ -13,11 +13,11 @@
...
@@ -13,11 +13,11 @@
// limitations under the License.
// limitations under the License.
#include "paddle/fluid/imperative/flags.h"
#include "paddle/fluid/imperative/flags.h"
#include "
gflags/g
flags.h"
#include "
paddle/fluid/platform/
flags.h"
DEFINE
_uint64
(
dygraph_debug
,
0
,
PADDLE_DEFINE_EXPORTED
_uint64
(
dygraph_debug
,
0
,
"Debug level of dygraph. This flag is not "
"Debug level of dygraph. This flag is not "
"open to users"
);
"open to users"
);
namespace
paddle
{
namespace
paddle
{
namespace
imperative
{
namespace
imperative
{
...
...
paddle/fluid/imperative/profiler.cc
浏览文件 @
ca0136a6
...
@@ -19,9 +19,9 @@
...
@@ -19,9 +19,9 @@
#endif
#endif
#include <glog/logging.h>
#include <glog/logging.h>
#include <mutex> // NOLINT
#include <mutex> // NOLINT
#include "
gflags/g
flags.h"
#include "
paddle/fluid/platform/
flags.h"
DEFINE
_string
(
PADDLE_DEFINE_EXPORTED
_string
(
tracer_profile_fname
,
"xxgperf"
,
tracer_profile_fname
,
"xxgperf"
,
"Profiler filename for imperative tracer, which generated by gperftools."
"Profiler filename for imperative tracer, which generated by gperftools."
"Only valid when compiled `WITH_PROFILER=ON`. Empty if disable."
);
"Only valid when compiled `WITH_PROFILER=ON`. Empty if disable."
);
...
...
paddle/fluid/memory/allocation/CMakeLists.txt
浏览文件 @
ca0136a6
...
@@ -99,7 +99,7 @@ cc_test(allocator_facade_abs_flags_test SRCS allocator_facade_abs_flags_test.cc
...
@@ -99,7 +99,7 @@ cc_test(allocator_facade_abs_flags_test SRCS allocator_facade_abs_flags_test.cc
cc_test
(
allocator_facade_frac_flags_test SRCS allocator_facade_frac_flags_test.cc DEPS allocator_facade
)
cc_test
(
allocator_facade_frac_flags_test SRCS allocator_facade_frac_flags_test.cc DEPS allocator_facade
)
cc_library
(
auto_growth_best_fit_allocator SRCS auto_growth_best_fit_allocator.cc DEPS allocator aligned_allocator
)
cc_library
(
auto_growth_best_fit_allocator SRCS auto_growth_best_fit_allocator.cc DEPS allocator aligned_allocator
flags
)
cc_test
(
auto_growth_best_fit_allocator_facade_test SRCS auto_growth_best_fit_allocator_facade_test.cc DEPS cpu_allocator auto_growth_best_fit_allocator
)
cc_test
(
auto_growth_best_fit_allocator_facade_test SRCS auto_growth_best_fit_allocator_facade_test.cc DEPS cpu_allocator auto_growth_best_fit_allocator
)
cc_test
(
auto_growth_best_fit_allocator_test SRCS auto_growth_best_fit_allocator_test.cc DEPS auto_growth_best_fit_allocator
)
cc_test
(
auto_growth_best_fit_allocator_test SRCS auto_growth_best_fit_allocator_test.cc DEPS auto_growth_best_fit_allocator
)
...
...
paddle/fluid/memory/allocation/allocator_facade.cc
浏览文件 @
ca0136a6
...
@@ -37,14 +37,15 @@
...
@@ -37,14 +37,15 @@
#endif
#endif
#include "paddle/fluid/platform/npu_info.h"
#include "paddle/fluid/platform/npu_info.h"
DEFINE
_int64
(
PADDLE_DEFINE_EXPORTED
_int64
(
gpu_allocator_retry_time
,
10000
,
gpu_allocator_retry_time
,
10000
,
"The retry time (milliseconds) when allocator fails "
"The retry time (milliseconds) when allocator fails "
"to allocate memory. No retry if this value is not greater than 0"
);
"to allocate memory. No retry if this value is not greater than 0"
);
DEFINE_bool
(
use_system_allocator
,
false
,
PADDLE_DEFINE_EXPORTED_bool
(
"Whether to use system allocator to allocate CPU and GPU memory. "
use_system_allocator
,
false
,
"Only used for unittests."
);
"Whether to use system allocator to allocate CPU and GPU memory. "
"Only used for unittests."
);
namespace
paddle
{
namespace
paddle
{
namespace
memory
{
namespace
memory
{
...
...
paddle/fluid/memory/allocation/auto_growth_best_fit_allocator.cc
浏览文件 @
ca0136a6
...
@@ -17,18 +17,21 @@
...
@@ -17,18 +17,21 @@
#include <algorithm>
#include <algorithm>
#include <mutex> // NOLINT
#include <mutex> // NOLINT
#include "paddle/fluid/memory/allocation/aligned_allocator.h"
#include "paddle/fluid/memory/allocation/aligned_allocator.h"
#include "paddle/fluid/platform/flags.h"
DEFINE_bool
(
free_idle_chunk
,
false
,
"Whether to free idle chunk when each allocation is freed. "
PADDLE_DEFINE_READONLY_EXPORTED_bool
(
"If false, all freed allocation would be cached to speed up next "
free_idle_chunk
,
false
,
"allocation request. If true, no allocation would be cached. This "
"Whether to free idle chunk when each allocation is freed. "
"flag only works when FLAGS_allocator_strategy=auto_growth."
);
"If false, all freed allocation would be cached to speed up next "
"allocation request. If true, no allocation would be cached. This "
DEFINE_bool
(
free_when_no_cache_hit
,
false
,
"flag only works when FLAGS_allocator_strategy=auto_growth."
);
"Whether to free idle chunks when no cache hit. If true, idle "
"chunk would be freed when no cache hit; if false, idle "
PADDLE_DEFINE_READONLY_EXPORTED_bool
(
"chunk would be freed when out of memory occurs. This flag "
free_when_no_cache_hit
,
false
,
"only works when FLAGS_allocator_strategy=auto_growth."
);
"Whether to free idle chunks when no cache hit. If true, idle "
"chunk would be freed when no cache hit; if false, idle "
"chunk would be freed when out of memory occurs. This flag "
"only works when FLAGS_allocator_strategy=auto_growth."
);
namespace
paddle
{
namespace
paddle
{
namespace
memory
{
namespace
memory
{
...
...
paddle/fluid/memory/allocation/naive_best_fit_allocator.cc
浏览文件 @
ca0136a6
...
@@ -34,12 +34,13 @@
...
@@ -34,12 +34,13 @@
#include "paddle/fluid/platform/xpu/xpu_header.h"
#include "paddle/fluid/platform/xpu/xpu_header.h"
#endif
#endif
DEFINE_bool
(
init_allocated_mem
,
false
,
PADDLE_DEFINE_EXPORTED_bool
(
"It is a mistake that the values of the memory allocated by "
init_allocated_mem
,
false
,
"BuddyAllocator are always zeroed in some op's implementation. "
"It is a mistake that the values of the memory allocated by "
"To find this error in time, we use init_allocated_mem to indicate "
"BuddyAllocator are always zeroed in some op's implementation. "
"that initializing the allocated memory with a small value "
"To find this error in time, we use init_allocated_mem to indicate "
"during unit testing."
);
"that initializing the allocated memory with a small value "
"during unit testing."
);
DECLARE_double
(
fraction_of_gpu_memory_to_use
);
DECLARE_double
(
fraction_of_gpu_memory_to_use
);
DECLARE_uint64
(
initial_gpu_memory_in_mb
);
DECLARE_uint64
(
initial_gpu_memory_in_mb
);
DECLARE_uint64
(
reallocate_gpu_memory_in_mb
);
DECLARE_uint64
(
reallocate_gpu_memory_in_mb
);
...
...
paddle/fluid/operators/pscore/heter_listen_and_serv_op.cc
浏览文件 @
ca0136a6
...
@@ -15,7 +15,8 @@ limitations under the License. */
...
@@ -15,7 +15,8 @@ limitations under the License. */
#include "paddle/fluid/operators/pscore/heter_listen_and_serv_op.h"
#include "paddle/fluid/operators/pscore/heter_listen_and_serv_op.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/op_registry.h"
DEFINE_int32
(
rpc_send_thread_num
,
12
,
"number of threads for rpc send"
);
PADDLE_DEFINE_EXPORTED_int32
(
rpc_send_thread_num
,
12
,
"number of threads for rpc send"
);
namespace
paddle
{
namespace
paddle
{
namespace
operators
{
namespace
operators
{
...
...
paddle/fluid/platform/CMakeLists.txt
浏览文件 @
ca0136a6
...
@@ -37,13 +37,13 @@ if (WITH_PYTHON)
...
@@ -37,13 +37,13 @@ if (WITH_PYTHON)
endif
(
NOT WIN32
)
endif
(
NOT WIN32
)
endif
()
endif
()
cc_library
(
flags SRCS flags.cc DEPS gflags
)
cc_library
(
flags SRCS flags.cc DEPS gflags
boost
)
cc_library
(
denormal SRCS denormal.cc DEPS
)
cc_library
(
denormal SRCS denormal.cc DEPS
)
cc_library
(
errors SRCS errors.cc DEPS error_codes_proto
)
cc_library
(
errors SRCS errors.cc DEPS error_codes_proto
)
cc_test
(
errors_test SRCS errors_test.cc DEPS errors enforce
)
cc_test
(
errors_test SRCS errors_test.cc DEPS errors enforce
)
set
(
enforce_deps flags errors boost
)
set
(
enforce_deps flags errors boost
flags
)
if
(
WITH_GPU
)
if
(
WITH_GPU
)
set
(
enforce_deps
${
enforce_deps
}
external_error_proto
)
set
(
enforce_deps
${
enforce_deps
}
external_error_proto
)
endif
()
endif
()
...
...
paddle/fluid/platform/cpu_info.cc
浏览文件 @
ca0136a6
...
@@ -31,7 +31,7 @@ limitations under the License. */
...
@@ -31,7 +31,7 @@ limitations under the License. */
#endif // _WIN32
#endif // _WIN32
#include <algorithm>
#include <algorithm>
#include "
gflags/g
flags.h"
#include "
paddle/fluid/platform/
flags.h"
DECLARE_double
(
fraction_of_cpu_memory_to_use
);
DECLARE_double
(
fraction_of_cpu_memory_to_use
);
DECLARE_uint64
(
initial_cpu_memory_in_mb
);
DECLARE_uint64
(
initial_cpu_memory_in_mb
);
...
@@ -42,7 +42,8 @@ DECLARE_double(fraction_of_cuda_pinned_memory_to_use);
...
@@ -42,7 +42,8 @@ DECLARE_double(fraction_of_cuda_pinned_memory_to_use);
// between host and device. Allocates too much would reduce the amount
// between host and device. Allocates too much would reduce the amount
// of memory available to the system for paging. So, by default, we
// of memory available to the system for paging. So, by default, we
// should set false to use_pinned_memory.
// should set false to use_pinned_memory.
DEFINE_bool
(
use_pinned_memory
,
true
,
"If set, allocate cpu pinned memory."
);
PADDLE_DEFINE_EXPORTED_bool
(
use_pinned_memory
,
true
,
"If set, allocate cpu pinned memory."
);
namespace
paddle
{
namespace
paddle
{
namespace
platform
{
namespace
platform
{
...
@@ -54,7 +55,9 @@ size_t CpuTotalPhysicalMemory() {
...
@@ -54,7 +55,9 @@ size_t CpuTotalPhysicalMemory() {
mib
[
1
]
=
HW_MEMSIZE
;
mib
[
1
]
=
HW_MEMSIZE
;
int64_t
size
=
0
;
int64_t
size
=
0
;
size_t
len
=
sizeof
(
size
);
size_t
len
=
sizeof
(
size
);
if
(
sysctl
(
mib
,
2
,
&
size
,
&
len
,
NULL
,
0
)
==
0
)
return
(
size_t
)
size
;
if
(
sysctl
(
mib
,
2
,
&
size
,
&
len
,
NULL
,
0
)
==
0
)
{
return
static_cast
<
size_t
>
(
size
);
}
return
0L
;
return
0L
;
#elif defined(_WIN32)
#elif defined(_WIN32)
MEMORYSTATUSEX
sMeminfo
;
MEMORYSTATUSEX
sMeminfo
;
...
...
paddle/fluid/platform/enforce.h
浏览文件 @
ca0136a6
...
@@ -101,6 +101,7 @@ limitations under the License. */
...
@@ -101,6 +101,7 @@ limitations under the License. */
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
#include "paddle/fluid/platform/type_defs.h"
#include "paddle/fluid/platform/type_defs.h"
#endif
#endif
#include "paddle/fluid/platform/flags.h"
namespace
paddle
{
namespace
paddle
{
namespace
platform
{
namespace
platform
{
...
...
paddle/fluid/platform/flags.cc
浏览文件 @
ca0136a6
此差异已折叠。
点击以展开。
paddle/fluid/platform/flags.h
0 → 100644
浏览文件 @
ca0136a6
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <cstdint>
#include <map>
#include <string>
#include <type_traits>
#include <typeindex>
#include "boost/variant.hpp"
#include "gflags/gflags.h"
#include "paddle/fluid/platform/macros.h"
namespace
paddle
{
namespace
platform
{
struct
FlagInfo
{
using
ValueType
=
boost
::
variant
<
bool
,
int32_t
,
int64_t
,
uint64_t
,
double
,
std
::
string
>
;
std
::
string
name
;
void
*
value_ptr
;
ValueType
default_value
;
std
::
string
doc
;
bool
is_writable
;
};
using
ExportedFlagInfoMap
=
std
::
map
<
std
::
string
,
FlagInfo
>
;
const
ExportedFlagInfoMap
&
GetExportedFlagInfoMap
();
#define __PADDLE_DEFINE_EXPORTED_FLAG(__name, __is_writable, __cpp_type, \
__gflag_type, __default_value, __doc) \
DEFINE_##__gflag_type(__name, __default_value, __doc); \
struct __PaddleRegisterFlag_##__name { \
__PaddleRegisterFlag_##__name() { \
const auto &instance = ::paddle::platform::GetExportedFlagInfoMap(); \
using Type = ::paddle::platform::ExportedFlagInfoMap; \
auto &info = const_cast<Type &>(instance)[#__name]; \
info.name = #__name; \
info.value_ptr = &(FLAGS_##__name); \
info.default_value = static_cast<__cpp_type>(__default_value); \
info.doc = __doc; \
info.is_writable = __is_writable; \
} \
}; \
static_assert(std::is_same<__PaddleRegisterFlag_##__name, \
::__PaddleRegisterFlag_##__name>::value, \
"FLAGS should define in global namespace"); \
static __PaddleRegisterFlag_##__name __PaddleRegisterFlag_instance##__name
#define PADDLE_DEFINE_EXPORTED_bool(name, default_value, doc) \
__PADDLE_DEFINE_EXPORTED_FLAG(name, true, bool, bool, default_value, doc)
#define PADDLE_DEFINE_READONLY_EXPORTED_bool(name, default_value, doc) \
__PADDLE_DEFINE_EXPORTED_FLAG(name, false, bool, bool, default_value, doc)
#define PADDLE_DEFINE_EXPORTED_int32(name, default_value, doc) \
__PADDLE_DEFINE_EXPORTED_FLAG(name, true, int32_t, int32, default_value, doc)
#define PADDLE_DEFINE_EXPORTED_int64(name, default_value, doc) \
__PADDLE_DEFINE_EXPORTED_FLAG(name, true, int64_t, int64, default_value, doc)
#define PADDLE_DEFINE_EXPORTED_uint64(name, default_value, doc) \
__PADDLE_DEFINE_EXPORTED_FLAG(name, true, uint64_t, uint64, default_value, \
doc)
#define PADDLE_DEFINE_EXPORTED_double(name, default_value, doc) \
__PADDLE_DEFINE_EXPORTED_FLAG(name, true, double, double, default_value, doc)
#define PADDLE_DEFINE_EXPORTED_string(name, default_value, doc) \
__PADDLE_DEFINE_EXPORTED_FLAG(name, true, ::std::string, string, \
default_value, doc)
}
// namespace platform
}
// namespace paddle
paddle/fluid/platform/init.cc
浏览文件 @
ca0136a6
...
@@ -43,9 +43,10 @@ limitations under the License. */
...
@@ -43,9 +43,10 @@ limitations under the License. */
#endif
#endif
DECLARE_int32
(
paddle_num_threads
);
DECLARE_int32
(
paddle_num_threads
);
DEFINE_int32
(
multiple_of_cupti_buffer_size
,
1
,
PADDLE_DEFINE_EXPORTED_int32
(
"Multiple of the CUPTI device buffer size. If the timestamps have "
multiple_of_cupti_buffer_size
,
1
,
"been dropped when you are profiling, try increasing this value."
);
"Multiple of the CUPTI device buffer size. If the timestamps have "
"been dropped when you are profiling, try increasing this value."
);
namespace
paddle
{
namespace
paddle
{
namespace
platform
{
namespace
platform
{
...
...
paddle/fluid/platform/place.cc
浏览文件 @
ca0136a6
...
@@ -14,11 +14,12 @@ limitations under the License. */
...
@@ -14,11 +14,12 @@ limitations under the License. */
#include "paddle/fluid/platform/place.h"
#include "paddle/fluid/platform/place.h"
DEFINE_bool
(
benchmark
,
false
,
PADDLE_DEFINE_EXPORTED_bool
(
"Doing memory benchmark. It will make deleting scope synchronized, "
benchmark
,
false
,
"and add some memory usage logs."
"Doing memory benchmark. It will make deleting scope synchronized, "
"Default cuda is asynchronous device, set to True will"
"and add some memory usage logs."
"force op run in synchronous mode."
);
"Default cuda is asynchronous device, set to True will"
"force op run in synchronous mode."
);
namespace
paddle
{
namespace
paddle
{
namespace
platform
{
namespace
platform
{
...
...
paddle/fluid/platform/profiler.cc
浏览文件 @
ca0136a6
...
@@ -24,7 +24,8 @@ limitations under the License. */
...
@@ -24,7 +24,8 @@ limitations under the License. */
#include "paddle/fluid/platform/dynload/nvtx.h"
#include "paddle/fluid/platform/dynload/nvtx.h"
#endif
#endif
DEFINE_bool
(
enable_rpc_profiler
,
false
,
"Enable rpc profiler or not."
);
PADDLE_DEFINE_EXPORTED_bool
(
enable_rpc_profiler
,
false
,
"Enable rpc profiler or not."
);
namespace
paddle
{
namespace
paddle
{
namespace
platform
{
namespace
platform
{
...
...
paddle/fluid/platform/xpu/xpu_info.cc
浏览文件 @
ca0136a6
...
@@ -18,14 +18,15 @@ limitations under the License. */
...
@@ -18,14 +18,15 @@ limitations under the License. */
#include "paddle/fluid/platform/xpu/xpu_header.h"
#include "paddle/fluid/platform/xpu/xpu_header.h"
#include "paddle/fluid/string/split.h"
#include "paddle/fluid/string/split.h"
DEFINE_string
(
selected_xpus
,
""
,
PADDLE_DEFINE_EXPORTED_string
(
"A list of device ids separated by comma, like: 0,1,2,3. "
selected_xpus
,
""
,
"This option is useful when doing multi process training and "
"A list of device ids separated by comma, like: 0,1,2,3. "
"each process have only one device (XPU). If you want to use "
"This option is useful when doing multi process training and "
"all visible devices, set this to empty string. NOTE: the "
"each process have only one device (XPU). If you want to use "
"reason of doing this is that we want to use P2P communication"
"all visible devices, set this to empty string. NOTE: the "
"between XPU devices, use XPU_VISIBLE_DEVICES can only use"
"reason of doing this is that we want to use P2P communication"
"share-memory only."
);
"between XPU devices, use XPU_VISIBLE_DEVICES can only use"
"share-memory only."
);
namespace
paddle
{
namespace
paddle
{
namespace
platform
{
namespace
platform
{
...
...
paddle/fluid/pybind/global_value_getter_setter.cc
浏览文件 @
ca0136a6
...
@@ -29,82 +29,8 @@
...
@@ -29,82 +29,8 @@
#include "paddle/fluid/platform/macros.h"
#include "paddle/fluid/platform/macros.h"
#include "pybind11/stl.h"
#include "pybind11/stl.h"
// data processing
// NOTE: where is these 2 flags from?
DECLARE_bool
(
use_mkldnn
);
DECLARE_string
(
tracer_mkldnn_ops_on
);
DECLARE_string
(
tracer_mkldnn_ops_off
);
// debug
DECLARE_bool
(
check_nan_inf
);
DECLARE_bool
(
cpu_deterministic
);
DECLARE_bool
(
enable_rpc_profiler
);
DECLARE_int32
(
multiple_of_cupti_buffer_size
);
DECLARE_bool
(
reader_queue_speed_test_mode
);
DECLARE_int32
(
call_stack_level
);
DECLARE_bool
(
sort_sum_gradient
);
DECLARE_bool
(
check_kernel_launch
);
// device management
DECLARE_int32
(
paddle_num_threads
);
// executor
DECLARE_bool
(
enable_parallel_graph
);
DECLARE_string
(
pe_profile_fname
);
DECLARE_string
(
print_sub_graph_dir
);
DECLARE_bool
(
use_ngraph
);
// memory management
DECLARE_string
(
allocator_strategy
);
DECLARE_double
(
eager_delete_tensor_gb
);
DECLARE_double
(
fraction_of_cpu_memory_to_use
);
DECLARE_bool
(
free_idle_chunk
);
DECLARE_bool
(
free_when_no_cache_hit
);
DECLARE_int32
(
fuse_parameter_groups_size
);
DECLARE_double
(
fuse_parameter_memory_size
);
DECLARE_bool
(
init_allocated_mem
);
DECLARE_uint64
(
initial_cpu_memory_in_mb
);
DECLARE_double
(
memory_fraction_of_eager_deletion
);
DECLARE_bool
(
use_pinned_memory
);
DECLARE_bool
(
use_system_allocator
);
// others
DECLARE_bool
(
benchmark
);
DECLARE_int32
(
inner_op_parallelism
);
DECLARE_int32
(
max_inplace_grad_add
);
DECLARE_string
(
tracer_profile_fname
);
DECLARE_bool
(
apply_pass_to_program
);
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
// cudnn
DECLARE_uint64
(
conv_workspace_size_limit
);
DECLARE_bool
(
cudnn_batchnorm_spatial_persistent
);
DECLARE_bool
(
cudnn_deterministic
);
DECLARE_bool
(
cudnn_exhaustive_search
);
DECLARE_bool
(
conv2d_disable_cudnn
);
// data processing
DECLARE_bool
(
enable_cublas_tensor_op_math
);
// device management
DECLARE_string
(
selected_gpus
);
// memory management
DECLARE_bool
(
eager_delete_scope
);
DECLARE_bool
(
fast_eager_deletion_mode
);
DECLARE_double
(
fraction_of_cuda_pinned_memory_to_use
);
DECLARE_double
(
fraction_of_gpu_memory_to_use
);
DECLARE_uint64
(
gpu_memory_limit_mb
);
DECLARE_uint64
(
initial_gpu_memory_in_mb
);
DECLARE_uint64
(
reallocate_gpu_memory_in_mb
);
// others
DECLARE_bool
(
sync_nccl_allreduce
);
#endif
#ifdef PADDLE_WITH_XPU
// device management
DECLARE_string
(
selected_xpus
);
#endif
#ifdef PADDLE_WITH_ASCEND_CL
// device management
DECLARE_string
(
selected_npus
);
// set minmum loss scaling value
DECLARE_int32
(
min_loss_scaling
);
#endif
#ifdef PADDLE_WITH_DISTRIBUTE
#ifdef PADDLE_WITH_DISTRIBUTE
DECLARE_int32
(
rpc_send_thread_num
);
DECLARE_int32
(
rpc_get_thread_num
);
DECLARE_int32
(
rpc_get_thread_num
);
DECLARE_int32
(
rpc_prefetch_thread_num
);
DECLARE_int32
(
rpc_prefetch_thread_num
);
#endif
#endif
...
@@ -181,7 +107,6 @@ class PYBIND11_HIDDEN GlobalVarGetterSetterRegistry {
...
@@ -181,7 +107,6 @@ class PYBIND11_HIDDEN GlobalVarGetterSetterRegistry {
PADDLE_ENFORCE_NOT_NULL
(
setter
,
PADDLE_ENFORCE_NOT_NULL
(
setter
,
platform
::
errors
::
InvalidArgument
(
platform
::
errors
::
InvalidArgument
(
"Setter of %s should not be null"
,
name
));
"Setter of %s should not be null"
,
name
));
var_infos_
.
insert
({
name
,
VarInfo
(
is_public
,
getter
,
setter
)});
var_infos_
.
insert
({
name
,
VarInfo
(
is_public
,
getter
,
setter
)});
}
}
...
@@ -243,81 +168,6 @@ class PYBIND11_HIDDEN GlobalVarGetterSetterRegistry {
...
@@ -243,81 +168,6 @@ class PYBIND11_HIDDEN GlobalVarGetterSetterRegistry {
GlobalVarGetterSetterRegistry
GlobalVarGetterSetterRegistry
::
instance_
;
GlobalVarGetterSetterRegistry
GlobalVarGetterSetterRegistry
::
instance_
;
class
GlobalVarGetterSetterRegistryHelper
{
public:
GlobalVarGetterSetterRegistryHelper
(
bool
is_public
,
bool
is_writable
,
const
std
::
string
&
var_names
)
:
is_public_
(
is_public
),
is_writable_
(
is_writable
),
var_names_
(
SplitVarNames
(
var_names
))
{}
template
<
typename
...
Args
>
void
Register
(
Args
&&
...
args
)
const
{
Impl
<
0
,
sizeof
...(
args
)
==
1
,
Args
...
>::
Register
(
is_public_
,
is_writable_
,
var_names_
,
std
::
forward
<
Args
>
(
args
)...);
}
private:
static
std
::
vector
<
std
::
string
>
SplitVarNames
(
const
std
::
string
&
names
)
{
auto
valid_char
=
[](
char
ch
)
{
return
!
std
::
isspace
(
ch
)
&&
ch
!=
','
;
};
std
::
vector
<
std
::
string
>
ret
;
size_t
i
=
0
,
j
=
0
,
n
=
names
.
size
();
while
(
i
<
n
)
{
for
(;
i
<
n
&&
!
valid_char
(
names
[
i
]);
++
i
)
{
}
for
(
j
=
i
+
1
;
j
<
n
&&
valid_char
(
names
[
j
]);
++
j
)
{
}
if
(
i
<
n
&&
j
<=
n
)
{
auto
substring
=
names
.
substr
(
i
,
j
-
i
);
VLOG
(
10
)
<<
"Get substring:
\"
"
<<
substring
<<
"
\"
"
;
ret
.
emplace_back
(
substring
);
}
i
=
j
+
1
;
}
return
ret
;
}
private:
template
<
size_t
kIdx
,
bool
kIsStop
,
typename
T
,
typename
...
Args
>
struct
Impl
{
static
void
Register
(
bool
is_public
,
bool
is_writable
,
const
std
::
vector
<
std
::
string
>
&
var_names
,
T
&&
var
,
Args
&&
...
args
)
{
PADDLE_ENFORCE_EQ
(
kIdx
+
1
+
sizeof
...(
args
),
var_names
.
size
(),
platform
::
errors
::
InvalidArgument
(
"Argument number not match name number"
));
Impl
<
kIdx
,
true
,
T
>::
Register
(
is_public
,
is_writable
,
var_names
,
var
);
Impl
<
kIdx
+
1
,
sizeof
...(
Args
)
==
1
,
Args
...
>::
Register
(
is_public
,
is_writable
,
var_names
,
std
::
forward
<
Args
>
(
args
)...);
}
};
template
<
size_t
kIdx
,
typename
T
>
struct
Impl
<
kIdx
,
true
,
T
>
{
static
void
Register
(
bool
is_public
,
bool
is_writable
,
const
std
::
vector
<
std
::
string
>
&
var_names
,
T
&&
var
)
{
auto
*
instance
=
GlobalVarGetterSetterRegistry
::
MutableInstance
();
if
(
is_writable
)
{
instance
->
Register
(
var_names
[
kIdx
],
is_public
,
GlobalVarGetterSetterRegistry
::
CreateGetter
(
std
::
forward
<
T
>
(
var
)),
GlobalVarGetterSetterRegistry
::
CreateSetter
(
&
var
));
}
else
{
instance
->
Register
(
var_names
[
kIdx
],
is_public
,
GlobalVarGetterSetterRegistry
::
CreateGetter
(
std
::
forward
<
T
>
(
var
)));
}
}
};
private:
const
bool
is_public_
;
const
bool
is_writable_
;
const
std
::
vector
<
std
::
string
>
var_names_
;
};
static
void
RegisterGlobalVarGetterSetter
();
static
void
RegisterGlobalVarGetterSetter
();
void
BindGlobalValueGetterSetter
(
pybind11
::
module
*
module
)
{
void
BindGlobalValueGetterSetter
(
pybind11
::
module
*
module
)
{
...
@@ -338,65 +188,51 @@ void BindGlobalValueGetterSetter(pybind11::module *module) {
...
@@ -338,65 +188,51 @@ void BindGlobalValueGetterSetter(pybind11::module *module) {
}
}
/* Public vars are designed to be writable. */
/* Public vars are designed to be writable. */
#define REGISTER_PUBLIC_GLOBAL_VAR(...) \
#define REGISTER_PUBLIC_GLOBAL_VAR(var) \
do { \
do { \
GlobalVarGetterSetterRegistryHelper(
/*is_public=*/
true, \
auto *instance = GlobalVarGetterSetterRegistry::MutableInstance(); \
/*is_writable=*/
true, "" #__VA_ARGS__) \
instance->Register(#var,
/*is_public=*/
true, \
.Register(__VA_ARGS__); \
GlobalVarGetterSetterRegistry::CreateGetter(var), \
GlobalVarGetterSetterRegistry::CreateSetter(&var)); \
} while (0)
} while (0)
#define REGISTER_PRIVATE_GLOBAL_VAR(is_writable, ...) \
struct
RegisterGetterSetterVisitor
:
public
boost
::
static_visitor
<
void
>
{
do { \
RegisterGetterSetterVisitor
(
const
std
::
string
&
name
,
bool
is_public
,
GlobalVarGetterSetterRegistryHelper(
/*is_public=*/
false, is_writable, \
void
*
value_ptr
)
"" #__VA_ARGS__) \
:
name_
(
name
),
value_ptr_
(
value_ptr
)
{}
.Register(__VA_ARGS__); \
} while (0)
static
void
RegisterGlobalVarGetterSetter
()
{
template
<
typename
T
>
REGISTER_PRIVATE_GLOBAL_VAR
(
/*is_writable=*/
false
,
FLAGS_free_idle_chunk
,
void
operator
()(
const
T
&
)
const
{
FLAGS_free_when_no_cache_hit
);
auto
&
value
=
*
static_cast
<
T
*>
(
value_ptr_
);
auto
*
instance
=
GlobalVarGetterSetterRegistry
::
MutableInstance
();
REGISTER_PUBLIC_GLOBAL_VAR
(
instance
->
Register
(
name_
,
is_public_
,
FLAGS_eager_delete_tensor_gb
,
FLAGS_enable_parallel_graph
,
GlobalVarGetterSetterRegistry
::
CreateGetter
(
value
),
FLAGS_allocator_strategy
,
FLAGS_use_system_allocator
,
FLAGS_check_nan_inf
,
GlobalVarGetterSetterRegistry
::
CreateSetter
(
&
value
));
FLAGS_call_stack_level
,
FLAGS_sort_sum_gradient
,
FLAGS_cpu_deterministic
,
}
FLAGS_enable_rpc_profiler
,
FLAGS_multiple_of_cupti_buffer_size
,
FLAGS_reader_queue_speed_test_mode
,
FLAGS_pe_profile_fname
,
FLAGS_print_sub_graph_dir
,
FLAGS_fraction_of_cpu_memory_to_use
,
FLAGS_fuse_parameter_groups_size
,
FLAGS_fuse_parameter_memory_size
,
FLAGS_init_allocated_mem
,
FLAGS_initial_cpu_memory_in_mb
,
FLAGS_memory_fraction_of_eager_deletion
,
FLAGS_use_pinned_memory
,
FLAGS_benchmark
,
FLAGS_inner_op_parallelism
,
FLAGS_tracer_profile_fname
,
FLAGS_paddle_num_threads
,
FLAGS_use_mkldnn
,
FLAGS_max_inplace_grad_add
,
FLAGS_tracer_mkldnn_ops_on
,
FLAGS_tracer_mkldnn_ops_off
,
FLAGS_apply_pass_to_program
);
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
REGISTER_PUBLIC_GLOBAL_VAR
(
FLAGS_gpu_memory_limit_mb
,
FLAGS_cudnn_deterministic
,
FLAGS_conv_workspace_size_limit
,
FLAGS_cudnn_batchnorm_spatial_persistent
,
FLAGS_cudnn_exhaustive_search
,
FLAGS_eager_delete_scope
,
FLAGS_fast_eager_deletion_mode
,
FLAGS_fraction_of_cuda_pinned_memory_to_use
,
FLAGS_fraction_of_gpu_memory_to_use
,
FLAGS_initial_gpu_memory_in_mb
,
FLAGS_reallocate_gpu_memory_in_mb
,
FLAGS_enable_cublas_tensor_op_math
,
FLAGS_selected_gpus
,
FLAGS_sync_nccl_allreduce
,
FLAGS_conv2d_disable_cudnn
,
FLAGS_check_kernel_launch
);
#endif
#ifdef PADDLE_WITH_XPU
REGISTER_PUBLIC_GLOBAL_VAR
(
FLAGS_selected_xpus
);
#endif
#ifdef PADDLE_WITH_ASCEND_CL
private:
REGISTER_PUBLIC_GLOBAL_VAR
(
FLAGS_selected_npus
);
std
::
string
name_
;
REGISTER_PUBLIC_GLOBAL_VAR
(
FLAGS_min_loss_scaling
);
bool
is_public_
;
#endif
void
*
value_ptr_
;
};
static
void
RegisterGlobalVarGetterSetter
()
{
#ifdef PADDLE_WITH_DITRIBUTE
#ifdef PADDLE_WITH_DITRIBUTE
REGISTER_PUBLIC_GLOBAL_VAR
(
FLAGS_rpc_send_thread_num
,
REGISTER_PUBLIC_GLOBAL_VAR
(
FLAGS_rpc_get_thread_num
);
FLAGS_rpc_get_thread_num
,
REGISTER_PUBLIC_GLOBAL_VAR
(
FLAGS_rpc_prefetch_thread_num
);
FLAGS_rpc_prefetch_thread_num
);
#endif
#endif
const
auto
&
flag_map
=
platform
::
GetExportedFlagInfoMap
();
for
(
const
auto
&
pair
:
flag_map
)
{
const
std
::
string
&
name
=
pair
.
second
.
name
;
bool
is_writable
=
pair
.
second
.
is_writable
;
void
*
value_ptr
=
const_cast
<
void
*>
(
pair
.
second
.
value_ptr
);
const
auto
&
default_value
=
pair
.
second
.
default_value
;
RegisterGetterSetterVisitor
visitor
(
"FLAGS_"
+
name
,
is_writable
,
value_ptr
);
boost
::
apply_visitor
(
visitor
,
default_value
);
}
}
}
}
// namespace pybind
}
// namespace pybind
}
// namespace paddle
}
// namespace paddle
paddle/fluid/pybind/reader_py.cc
浏览文件 @
ca0136a6
...
@@ -32,9 +32,10 @@
...
@@ -32,9 +32,10 @@
#include "paddle/fluid/platform/place.h"
#include "paddle/fluid/platform/place.h"
#include "pybind11/stl.h"
#include "pybind11/stl.h"
DEFINE_bool
(
reader_queue_speed_test_mode
,
false
,
PADDLE_DEFINE_EXPORTED_bool
(
"If set true, the queue.pop will only get data from queue but not "
reader_queue_speed_test_mode
,
false
,
"remove the data from queue for speed testing"
);
"If set true, the queue.pop will only get data from queue but not "
"remove the data from queue for speed testing"
);
namespace
paddle
{
namespace
paddle
{
namespace
pybind
{
namespace
pybind
{
...
...
python/paddle/fluid/__init__.py
浏览文件 @
ca0136a6
...
@@ -176,83 +176,23 @@ def __bootstrap__():
...
@@ -176,83 +176,23 @@ def __bootstrap__():
print
(
'PLEASE USE OMP_NUM_THREADS WISELY.'
,
file
=
sys
.
stderr
)
print
(
'PLEASE USE OMP_NUM_THREADS WISELY.'
,
file
=
sys
.
stderr
)
os
.
environ
[
'OMP_NUM_THREADS'
]
=
str
(
num_threads
)
os
.
environ
[
'OMP_NUM_THREADS'
]
=
str
(
num_threads
)
sysstr
=
platform
.
system
()
flag_prefix
=
'FLAGS_'
read_env_flags
=
[
read_env_flags
=
[
'check_nan_inf'
,
key
[
len
(
flag_prefix
):]
for
key
in
core
.
globals
().
keys
()
'convert_all_blocks'
,
if
key
.
startswith
(
flag_prefix
)
'benchmark'
,
'eager_delete_scope'
,
'fraction_of_cpu_memory_to_use'
,
'initial_cpu_memory_in_mb'
,
'init_allocated_mem'
,
'paddle_num_threads'
,
'dist_threadpool_size'
,
'eager_delete_tensor_gb'
,
'fast_eager_deletion_mode'
,
'memory_fraction_of_eager_deletion'
,
'allocator_strategy'
,
'reader_queue_speed_test_mode'
,
'print_sub_graph_dir'
,
'pe_profile_fname'
,
'inner_op_parallelism'
,
'enable_parallel_graph'
,
'fuse_parameter_groups_size'
,
'multiple_of_cupti_buffer_size'
,
'fuse_parameter_memory_size'
,
'tracer_profile_fname'
,
'dygraph_debug'
,
'use_system_allocator'
,
'enable_unused_var_check'
,
'free_idle_chunk'
,
'free_when_no_cache_hit'
,
'call_stack_level'
,
'sort_sum_gradient'
,
'max_inplace_grad_add'
,
'apply_pass_to_program'
,
'new_executor_use_inplace'
,
]
]
if
'Darwin'
not
in
sysstr
:
read_env_flags
.
append
(
'use_pinned_memory'
)
if
os
.
name
!=
'nt'
:
def
remove_flag_if_exists
(
name
):
read_env_flags
.
append
(
'cpu_deterministic'
)
if
name
in
read_env_flags
:
read_env_flags
.
remove
(
name
)
if
core
.
is_compiled_with_mkldnn
():
sysstr
=
platform
.
system
()
read_env_flags
.
append
(
'use_mkldnn'
)
if
'Darwin'
in
sysstr
:
read_env_flags
.
append
(
'tracer_mkldnn_ops_on'
)
remove_flags_if_exists
(
'use_pinned_memory'
)
read_env_flags
.
append
(
'tracer_mkldnn_ops_off'
)
if
core
.
is_compiled_with_cuda
():
read_env_flags
+=
[
'fraction_of_gpu_memory_to_use'
,
'initial_gpu_memory_in_mb'
,
'reallocate_gpu_memory_in_mb'
,
'cudnn_deterministic'
,
'enable_cublas_tensor_op_math'
,
'conv_workspace_size_limit'
,
'cudnn_exhaustive_search'
,
'selected_gpus'
,
'sync_nccl_allreduce'
,
'cudnn_batchnorm_spatial_persistent'
,
'gpu_allocator_retry_time'
,
'local_exe_sub_scope_limit'
,
'gpu_memory_limit_mb'
,
'conv2d_disable_cudnn'
,
'get_host_by_name_time'
,
]
if
core
.
is_compiled_with_npu
():
if
os
.
name
==
'nt'
:
read_env_flags
+=
[
remove_flags_if_exists
(
'cpu_deterministic'
)
'selected_npus'
,
'fraction_of_gpu_memory_to_use'
,
'initial_gpu_memory_in_mb'
,
'reallocate_gpu_memory_in_mb'
,
'gpu_memory_limit_mb'
,
'npu_config_path'
,
'get_host_by_name_time'
,
'hccl_check_nan'
,
'min_loss_scaling'
,
]
core
.
init_gflags
([
"--tryfromenv="
+
","
.
join
(
read_env_flags
)])
core
.
init_gflags
([
"--tryfromenv="
+
","
.
join
(
read_env_flags
)])
# Note(zhouwei25): sys may not have argv in some cases,
# Note(zhouwei25): sys may not have argv in some cases,
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录