Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
BaiXuePrincess
Paddle
提交
708bd979
P
Paddle
项目概览
BaiXuePrincess
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
708bd979
编写于
8月 16, 2019
作者:
Z
Zeng Jinle
提交者:
GitHub
8月 16, 2019
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
move_flags_to_unified_files_for_management, test=develop (#19224)
上级
002f325d
变更
15
隐藏空白更改
内联
并排
Showing
15 changed file
with
218 addition
and
134 deletion
+218
-134
paddle/fluid/framework/details/all_reduce_op_handle.cc
paddle/fluid/framework/details/all_reduce_op_handle.cc
+3
-7
paddle/fluid/framework/garbage_collector.cc
paddle/fluid/framework/garbage_collector.cc
+4
-22
paddle/fluid/framework/operator.cc
paddle/fluid/framework/operator.cc
+1
-3
paddle/fluid/framework/threadpool.cc
paddle/fluid/framework/threadpool.cc
+3
-2
paddle/fluid/memory/allocation/allocator_strategy.cc
paddle/fluid/memory/allocation/allocator_strategy.cc
+1
-5
paddle/fluid/operators/batch_norm_op.cu
paddle/fluid/operators/batch_norm_op.cu
+1
-9
paddle/fluid/operators/conv_cudnn_op.cu.cc
paddle/fluid/operators/conv_cudnn_op.cu.cc
+3
-10
paddle/fluid/operators/conv_fusion_op.cu.cc
paddle/fluid/operators/conv_fusion_op.cu.cc
+1
-3
paddle/fluid/operators/distributed/communicator.cc
paddle/fluid/operators/distributed/communicator.cc
+3
-4
paddle/fluid/platform/CMakeLists.txt
paddle/fluid/platform/CMakeLists.txt
+4
-2
paddle/fluid/platform/cpu_info.cc
paddle/fluid/platform/cpu_info.cc
+3
-10
paddle/fluid/platform/flags.cc
paddle/fluid/platform/flags.cc
+182
-0
paddle/fluid/platform/gpu_info.cc
paddle/fluid/platform/gpu_info.cc
+5
-52
paddle/fluid/platform/init.cc
paddle/fluid/platform/init.cc
+1
-2
paddle/fluid/string/CMakeLists.txt
paddle/fluid/string/CMakeLists.txt
+3
-3
未找到文件。
paddle/fluid/framework/details/all_reduce_op_handle.cc
浏览文件 @
708bd979
...
...
@@ -20,13 +20,9 @@
#include "paddle/fluid/platform/gpu_info.h"
#include "paddle/fluid/platform/profiler.h"
// asynchronous nccl allreduce or synchronous issue:
// https://github.com/PaddlePaddle/Paddle/issues/15049
// If you want to change this default value, why?(gongwb)
DEFINE_bool
(
sync_nccl_allreduce
,
true
,
"If set true, will call `cudaStreamSynchronize(nccl_stream)`"
"after allreduce, this mode can get better performance in some scenarios."
);
#ifdef PADDLE_WITH_CUDA
DECLARE_bool
(
sync_nccl_allreduce
);
#endif
namespace
paddle
{
namespace
framework
{
...
...
paddle/fluid/framework/garbage_collector.cc
浏览文件 @
708bd979
...
...
@@ -25,31 +25,13 @@
#include "glog/logging.h"
#include "paddle/fluid/framework/garbage_collector.h"
DECLARE_double
(
eager_delete_tensor_gb
);
DECLARE_double
(
memory_fraction_of_eager_deletion
);
DECLARE_bool
(
fast_eager_deletion_mode
);
namespace
paddle
{
namespace
framework
{
// Disable gc by default when inference library is built
#ifdef PADDLE_ON_INFERENCE
static
const
double
kDefaultEagerDeleteTensorGB
=
-
1
;
#else
static
const
double
kDefaultEagerDeleteTensorGB
=
0
;
#endif
DEFINE_double
(
eager_delete_tensor_gb
,
kDefaultEagerDeleteTensorGB
,
"Memory size threshold (GB) when the garbage collector clear tensors."
"Disabled when this value is less than 0"
);
DEFINE_bool
(
fast_eager_deletion_mode
,
true
,
"Fast eager deletion mode. If enabled, memory would release "
"immediately without waiting GPU kernel ends."
);
DEFINE_double
(
memory_fraction_of_eager_deletion
,
1.0
,
"Fraction of eager deletion. If less than 1.0, all variables in "
"the program would be sorted according to its memory size, and "
"only the FLAGS_memory_fraction_of_eager_deletion of the largest "
"variables would be deleted."
);
GarbageCollector
::
GarbageCollector
(
const
platform
::
Place
&
place
,
size_t
max_memory_size
)
:
max_memory_size_
((
std
::
max
)(
max_memory_size
,
static_cast
<
size_t
>
(
1
)))
{
...
...
paddle/fluid/framework/operator.cc
浏览文件 @
708bd979
...
...
@@ -32,9 +32,7 @@ limitations under the License. */
#include "paddle/fluid/platform/profiler.h"
DECLARE_bool
(
benchmark
);
DEFINE_bool
(
check_nan_inf
,
false
,
"Checking whether operator produce NAN/INF or not. It will be "
"extremely slow so please use this flag wisely."
);
DECLARE_bool
(
check_nan_inf
);
DEFINE_int32
(
inner_op_parallelism
,
0
,
"number of threads for inner op"
);
DEFINE_bool
(
fast_check_nan_inf
,
false
,
"Fast checking NAN/INF after each operation. It will be a little"
...
...
paddle/fluid/framework/threadpool.cc
浏览文件 @
708bd979
...
...
@@ -13,6 +13,8 @@
limitations under the License. */
#include "paddle/fluid/framework/threadpool.h"
#include <memory>
#include <utility>
#include "gflags/gflags.h"
#include "paddle/fluid/platform/enforce.h"
...
...
@@ -20,8 +22,7 @@
DEFINE_int32
(
io_threadpool_size
,
100
,
"number of threads used for doing IO, default 100"
);
DEFINE_int32
(
dist_threadpool_size
,
0
,
"number of threads used for distributed executed."
);
DECLARE_int32
(
dist_threadpool_size
);
namespace
paddle
{
namespace
framework
{
...
...
paddle/fluid/memory/allocation/allocator_strategy.cc
浏览文件 @
708bd979
...
...
@@ -17,11 +17,7 @@
#include "glog/logging.h"
#include "paddle/fluid/platform/enforce.h"
DEFINE_string
(
allocator_strategy
,
"naive_best_fit"
,
"The allocation strategy. naive_best_fit means the original best "
"fit allocator of Fluid. "
"auto_growth means the experimental auto-growth allocator. "
"Enum in [naive_best_fit, auto_growth]."
);
DECLARE_string
(
allocator_strategy
);
namespace
paddle
{
namespace
memory
{
...
...
paddle/fluid/operators/batch_norm_op.cu
浏览文件 @
708bd979
...
...
@@ -23,15 +23,7 @@ limitations under the License. */
#include "paddle/fluid/platform/cudnn_helper.h"
#include "paddle/fluid/platform/float16.h"
// CUDNN_BATCHNORM_SPATIAL_PERSISTENT in batchnorm. This mode can be faster in
// some tasks because an optimized path may be selected for CUDNN_DATA_FLOAT
// and CUDNN_DATA_HALF data types, compute capability 6.0 or higher. The
// reason we set it to false by default is that this mode may use scaled
// atomic integer reduction that may cause a numerical overflow for certain
// input data range.
DEFINE_bool
(
cudnn_batchnorm_spatial_persistent
,
false
,
"Whether enable CUDNN_BATCHNORM_SPATIAL_PERSISTENT mode for cudnn "
"batch_norm, default is False."
);
DECLARE_bool
(
cudnn_batchnorm_spatial_persistent
);
namespace
paddle
{
namespace
operators
{
...
...
paddle/fluid/operators/conv_cudnn_op.cu.cc
浏览文件 @
708bd979
...
...
@@ -24,16 +24,9 @@ limitations under the License. */
#include "paddle/fluid/platform/float16.h"
#include "paddle/fluid/platform/profiler.h"
DEFINE_bool
(
cudnn_deterministic
,
false
,
"Whether allow using an autotuning algorithm for convolution "
"operator. The autotuning algorithm may be non-deterministic. If "
"true, the algorithm is deterministic."
);
DEFINE_uint64
(
conv_workspace_size_limit
,
paddle
::
platform
::
kDefaultConvWorkspaceSizeLimitMB
,
"cuDNN convolution workspace limit in MB unit."
);
DEFINE_bool
(
cudnn_exhaustive_search
,
false
,
"Whether enable exhaustive search for cuDNN convolution or "
"not, default is False."
);
DECLARE_bool
(
cudnn_deterministic
);
DECLARE_uint64
(
conv_workspace_size_limit
);
DECLARE_bool
(
cudnn_exhaustive_search
);
namespace
paddle
{
namespace
operators
{
...
...
paddle/fluid/operators/conv_fusion_op.cu.cc
浏览文件 @
708bd979
...
...
@@ -16,9 +16,7 @@ limitations under the License. */
#include "paddle/fluid/operators/conv_cudnn_op_cache.h"
#include "paddle/fluid/platform/cudnn_helper.h"
DEFINE_int64
(
cudnn_exhaustive_search_times
,
-
1
,
"Exhaustive search times for cuDNN convolution, "
"default is -1, not exhaustive search"
);
DECLARE_int64
(
cudnn_exhaustive_search_times
);
namespace
paddle
{
namespace
operators
{
...
...
paddle/fluid/operators/distributed/communicator.cc
浏览文件 @
708bd979
...
...
@@ -26,18 +26,17 @@ limitations under the License. */
#include "paddle/fluid/operators/distributed/parameter_recv.h"
#include "paddle/fluid/operators/distributed/parameter_send.h"
DECLARE_int32
(
communicator_max_merge_var_num
);
DECLARE_int32
(
communicator_send_queue_size
);
DEFINE_bool
(
communicator_independent_recv_thread
,
true
,
"use an independent to recv vars from parameter server"
);
DEFINE_int32
(
communicator_send_queue_size
,
20
,
"queue size to recv gradient before send"
);
DEFINE_int32
(
communicator_min_send_grad_num_before_recv
,
20
,
"max grad num to send before recv parameters"
);
DEFINE_int32
(
communicator_thread_pool_size
,
5
,
"thread num to do send or recv"
);
DEFINE_int32
(
communicator_send_wait_times
,
5
,
"times that send thread will wait if merge num does not reach "
"max_merge_var_num"
);
DEFINE_int32
(
communicator_max_merge_var_num
,
20
,
"max var num to merge and send"
);
DEFINE_bool
(
communicator_fake_rpc
,
false
,
"fake mode does not really send any thing"
);
DEFINE_bool
(
communicator_merge_sparse_grad
,
true
,
...
...
paddle/fluid/platform/CMakeLists.txt
浏览文件 @
708bd979
...
...
@@ -20,10 +20,12 @@ add_custom_command(TARGET profiler_py_proto POST_BUILD
WORKING_DIRECTORY
${
CMAKE_CURRENT_BINARY_DIR
}
)
endif
(
NOT WIN32
)
cc_library
(
flags SRCS flags.cc DEPS gflags
)
if
(
WITH_GPU
)
nv_library
(
enforce SRCS enforce.cc
)
nv_library
(
enforce SRCS enforce.cc
DEPS flags
)
else
()
cc_library
(
enforce SRCS enforce.cc
)
cc_library
(
enforce SRCS enforce.cc
DEPS flags
)
endif
()
cc_test
(
enforce_test SRCS enforce_test.cc DEPS stringpiece enforce
)
...
...
paddle/fluid/platform/cpu_info.cc
浏览文件 @
708bd979
...
...
@@ -32,16 +32,9 @@ limitations under the License. */
#include <algorithm>
#include "gflags/gflags.h"
DEFINE_double
(
fraction_of_cpu_memory_to_use
,
1
,
"Default use 100% of CPU memory for PaddlePaddle,"
"reserve the rest for page tables, etc"
);
DEFINE_uint64
(
initial_cpu_memory_in_mb
,
500ul
,
"Initial CPU memory for PaddlePaddle, in MD unit."
);
DEFINE_double
(
fraction_of_cuda_pinned_memory_to_use
,
0.5
,
"Default use 50% of CPU memory as the pinned_memory for PaddlePaddle,"
"reserve the rest for page tables, etc"
);
DECLARE_double
(
fraction_of_cpu_memory_to_use
);
DECLARE_uint64
(
initial_cpu_memory_in_mb
);
DECLARE_double
(
fraction_of_cuda_pinned_memory_to_use
);
// If use_pinned_memory is true, CPUAllocator calls mlock, which
// returns pinned and locked memory as staging areas for data exchange
...
...
paddle/fluid/platform/flags.cc
0 → 100644
浏览文件 @
708bd979
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "gflags/gflags.h"
#ifdef PADDLE_WITH_CUDA
#include "paddle/fluid/platform/cudnn_workspace_helper.h"
#endif
/**
* NOTE(paddle-dev): This file is designed to define all public FLAGS.
*/
/* Paddle initialization related */
DEFINE_int32
(
paddle_num_threads
,
1
,
"Number of threads for each paddle instance."
);
/* Operator related */
DEFINE_bool
(
check_nan_inf
,
false
,
"Checking whether operator produce NAN/INF or not. It will be "
"extremely slow so please use this flag wisely."
);
/* CUDA related */
#ifdef PADDLE_WITH_CUDA
DEFINE_bool
(
enable_cublas_tensor_op_math
,
false
,
"The enable_cublas_tensor_op_math indicate whether to use Tensor Core, "
"but it may loss precision. Currently, There are two CUDA libraries that"
" use Tensor Cores, cuBLAS and cuDNN. cuBLAS uses Tensor Cores to speed up"
" GEMM computations(the matrices must be either half precision or single "
"precision); cuDNN uses Tensor Cores to speed up both convolutions(the "
"input and output must be half precision) and recurrent neural networks "
"(RNNs)."
);
DEFINE_string
(
selected_gpus
,
""
,
"A list of device ids separated by comma, like: 0,1,2,3. "
"This option is useful when doing multi process training and "
"each process have only one device (GPU). If you want to use "
"all visible devices, set this to empty string. NOTE: the "
"reason of doing this is that we want to use P2P communication"
"between GPU devices, use CUDA_VISIBLE_DEVICES can only use"
"share-memory only."
);
#endif
/* CUDNN related */
#ifdef PADDLE_WITH_CUDA
DEFINE_bool
(
cudnn_deterministic
,
false
,
"Whether allow using an autotuning algorithm for convolution "
"operator. The autotuning algorithm may be non-deterministic. If "
"true, the algorithm is deterministic."
);
DEFINE_uint64
(
conv_workspace_size_limit
,
paddle
::
platform
::
kDefaultConvWorkspaceSizeLimitMB
,
"cuDNN convolution workspace limit in MB unit."
);
DEFINE_bool
(
cudnn_exhaustive_search
,
false
,
"Whether enable exhaustive search for cuDNN convolution or "
"not, default is False."
);
DEFINE_int64
(
cudnn_exhaustive_search_times
,
-
1
,
"Exhaustive search times for cuDNN convolution, "
"default is -1, not exhaustive search"
);
// CUDNN_BATCHNORM_SPATIAL_PERSISTENT in batchnorm. This mode can be faster in
// some tasks because an optimized path may be selected for CUDNN_DATA_FLOAT
// and CUDNN_DATA_HALF data types, compute capability 6.0 or higher. The
// reason we set it to false by default is that this mode may use scaled
// atomic integer reduction that may cause a numerical overflow for certain
// input data range.
DEFINE_bool
(
cudnn_batchnorm_spatial_persistent
,
false
,
"Whether enable CUDNN_BATCHNORM_SPATIAL_PERSISTENT mode for cudnn "
"batch_norm, default is False."
);
#endif
/* NCCL related */
#ifdef PADDLE_WITH_CUDA
// asynchronous nccl allreduce or synchronous issue:
// https://github.com/PaddlePaddle/Paddle/issues/15049
// If you want to change this default value, why?(gongwb)
DEFINE_bool
(
sync_nccl_allreduce
,
true
,
"If set true, will call `cudaStreamSynchronize(nccl_stream)`"
"after allreduce, this mode can get better performance in some scenarios."
);
#endif
/* Distributed related */
#ifdef PADDLE_WITH_DISTRIBUTE
DEFINE_int32
(
communicator_max_merge_var_num
,
20
,
"max var num to merge and send"
);
DEFINE_int32
(
communicator_send_queue_size
,
20
,
"queue size to recv gradient before send"
);
#endif
DEFINE_int32
(
dist_threadpool_size
,
0
,
"number of threads used for distributed executed."
);
/* Garbage collector related */
// Disable gc by default when inference library is built
#ifdef PADDLE_ON_INFERENCE
static
const
double
kDefaultEagerDeleteTensorGB
=
-
1
;
#else
static
const
double
kDefaultEagerDeleteTensorGB
=
0
;
#endif
DEFINE_double
(
eager_delete_tensor_gb
,
kDefaultEagerDeleteTensorGB
,
"Memory size threshold (GB) when the garbage collector clear tensors."
"Disabled when this value is less than 0"
);
DEFINE_bool
(
fast_eager_deletion_mode
,
true
,
"Fast eager deletion mode. If enabled, memory would release "
"immediately without waiting GPU kernel ends."
);
DEFINE_double
(
memory_fraction_of_eager_deletion
,
1.0
,
"Fraction of eager deletion. If less than 1.0, all variables in "
"the program would be sorted according to its memory size, and "
"only the FLAGS_memory_fraction_of_eager_deletion of the largest "
"variables would be deleted."
);
/* Allocator related */
DEFINE_string
(
allocator_strategy
,
"naive_best_fit"
,
"The allocation strategy. naive_best_fit means the original best "
"fit allocator of Fluid. "
"auto_growth means the experimental auto-growth allocator. "
"Enum in [naive_best_fit, auto_growth]."
);
DEFINE_double
(
fraction_of_cpu_memory_to_use
,
1
,
"Default use 100% of CPU memory for PaddlePaddle,"
"reserve the rest for page tables, etc"
);
DEFINE_uint64
(
initial_cpu_memory_in_mb
,
500ul
,
"Initial CPU memory for PaddlePaddle, in MD unit."
);
DEFINE_double
(
fraction_of_cuda_pinned_memory_to_use
,
0.5
,
"Default use 50% of CPU memory as the pinned_memory for PaddlePaddle,"
"reserve the rest for page tables, etc"
);
#ifdef PADDLE_WITH_CUDA
#ifndef _WIN32
constexpr
static
float
fraction_of_gpu_memory_to_use
=
0.92
f
;
#else
// fraction_of_gpu_memory_to_use cannot be too high on windows,
// since the win32 graphic sub-system can occupy some GPU memory
// which may lead to insufficient memory left for paddle
constexpr
static
float
fraction_of_gpu_memory_to_use
=
0.5
f
;
#endif
DEFINE_double
(
fraction_of_gpu_memory_to_use
,
fraction_of_gpu_memory_to_use
,
"Allocate a trunk of gpu memory that is this fraction of the "
"total gpu memory size. Future memory usage will be allocated "
"from the trunk. If the trunk doesn't have enough gpu memory, "
"additional trunks of the same size will be requested from gpu "
"until the gpu has no memory left for another trunk."
);
DEFINE_uint64
(
initial_gpu_memory_in_mb
,
0ul
,
"Allocate a trunk of gpu memory whose byte size is specified by "
"the flag. Future memory usage will be allocated from the "
"trunk. If the trunk doesn't have enough gpu memory, additional "
"trunks of the gpu memory will be requested from gpu with size "
"specified by FLAGS_reallocate_gpu_memory_in_mb until the gpu has "
"no memory left for the additional trunk. Note: if you set this "
"flag, the memory size set by "
"FLAGS_fraction_of_gpu_memory_to_use will be overrided by this "
"flag. If you don't set this flag, PaddlePaddle will use "
"FLAGS_fraction_of_gpu_memory_to_use to allocate gpu memory"
);
DEFINE_uint64
(
reallocate_gpu_memory_in_mb
,
0ul
,
"If this flag is set, Paddle will reallocate the gpu memory with "
"size specified by this flag. Else Paddle will reallocate by "
"FLAGS_fraction_of_gpu_memory_to_use"
);
#endif
paddle/fluid/platform/gpu_info.cc
浏览文件 @
708bd979
...
...
@@ -21,61 +21,14 @@ limitations under the License. */
#include "paddle/fluid/platform/enforce.h"
#include "paddle/fluid/string/split.h"
#ifndef _WIN32
constexpr
static
float
fraction_of_gpu_memory_to_use
=
0.92
f
;
#else
// fraction_of_gpu_memory_to_use cannot be too high on windows,
// since the win32 graphic sub-system can occupy some GPU memory
// which may lead to insufficient memory left for paddle
constexpr
static
float
fraction_of_gpu_memory_to_use
=
0.5
f
;
#endif
DECLARE_double
(
fraction_of_gpu_memory_to_use
);
DECLARE_uint64
(
initial_gpu_memory_in_mb
);
DECLARE_uint64
(
reallocate_gpu_memory_in_mb
);
DECLARE_bool
(
enable_cublas_tensor_op_math
);
DECLARE_string
(
selected_gpus
);
constexpr
static
float
fraction_reserve_gpu_memory
=
0.05
f
;
DEFINE_double
(
fraction_of_gpu_memory_to_use
,
fraction_of_gpu_memory_to_use
,
"Allocate a trunk of gpu memory that is this fraction of the "
"total gpu memory size. Future memory usage will be allocated "
"from the trunk. If the trunk doesn't have enough gpu memory, "
"additional trunks of the same size will be requested from gpu "
"until the gpu has no memory left for another trunk."
);
DEFINE_uint64
(
initial_gpu_memory_in_mb
,
0ul
,
"Allocate a trunk of gpu memory whose byte size is specified by "
"the flag. Future memory usage will be allocated from the "
"trunk. If the trunk doesn't have enough gpu memory, additional "
"trunks of the gpu memory will be requested from gpu with size "
"specified by FLAGS_reallocate_gpu_memory_in_mb until the gpu has "
"no memory left for the additional trunk. Note: if you set this "
"flag, the memory size set by "
"FLAGS_fraction_of_gpu_memory_to_use will be overrided by this "
"flag. If you don't set this flag, PaddlePaddle will use "
"FLAGS_fraction_of_gpu_memory_to_use to allocate gpu memory"
);
DEFINE_uint64
(
reallocate_gpu_memory_in_mb
,
0ul
,
"If this flag is set, Paddle will reallocate the gpu memory with "
"size specified by this flag. Else Paddle will reallocate by "
"FLAGS_fraction_of_gpu_memory_to_use"
);
DEFINE_bool
(
enable_cublas_tensor_op_math
,
false
,
"The enable_cublas_tensor_op_math indicate whether to use Tensor Core, "
"but it may loss precision. Currently, There are two CUDA libraries that"
" use Tensor Cores, cuBLAS and cuDNN. cuBLAS uses Tensor Cores to speed up"
" GEMM computations(the matrices must be either half precision or single "
"precision); cuDNN uses Tensor Cores to speed up both convolutions(the "
"input and output must be half precision) and recurrent neural networks "
"(RNNs)."
);
DEFINE_string
(
selected_gpus
,
""
,
"A list of device ids separated by comma, like: 0,1,2,3. "
"This option is useful when doing multi process training and "
"each process have only one device (GPU). If you want to use "
"all visible devices, set this to empty string. NOTE: the "
"reason of doing this is that we want to use P2P communication"
"between GPU devices, use CUDA_VISIBLE_DEVICES can only use"
"share-memory only."
);
namespace
paddle
{
namespace
platform
{
...
...
paddle/fluid/platform/init.cc
浏览文件 @
708bd979
...
...
@@ -36,8 +36,7 @@ limitations under the License. */
#include "dgc/dgc.h"
#endif
DEFINE_int32
(
paddle_num_threads
,
1
,
"Number of threads for each paddle instance."
);
DECLARE_int32
(
paddle_num_threads
);
DEFINE_int32
(
multiple_of_cupti_buffer_size
,
1
,
"Multiple of the CUPTI device buffer size. If the timestamps have "
"been dropped when you are profiling, try increasing this value."
);
...
...
paddle/fluid/string/CMakeLists.txt
浏览文件 @
708bd979
cc_library
(
stringpiece SRCS piece.cc
)
cc_library
(
pretty_log SRCS pretty_log.cc
)
cc_library
(
string_helper SRCS string_helper.cc DEPS boost
)
cc_library
(
stringpiece SRCS piece.cc
DEPS flags
)
cc_library
(
pretty_log SRCS pretty_log.cc
DEPS flags
)
cc_library
(
string_helper SRCS string_helper.cc DEPS boost
flags
)
cc_test
(
stringpiece_test SRCS piece_test.cc DEPS stringpiece glog gflags
)
cc_test
(
stringprintf_test SRCS printf_test.cc DEPS glog gflags
)
cc_test
(
to_string_test SRCS to_string_test.cc
)
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录