From 708bd9798d581179a702040d606e6103e065674e Mon Sep 17 00:00:00 2001 From: Zeng Jinle <32832641+sneaxiy@users.noreply.github.com> Date: Fri, 16 Aug 2019 09:33:53 +0800 Subject: [PATCH] move_flags_to_unified_files_for_management, test=develop (#19224) --- .../framework/details/all_reduce_op_handle.cc | 10 +- paddle/fluid/framework/garbage_collector.cc | 26 +-- paddle/fluid/framework/operator.cc | 4 +- paddle/fluid/framework/threadpool.cc | 5 +- .../memory/allocation/allocator_strategy.cc | 6 +- paddle/fluid/operators/batch_norm_op.cu | 10 +- paddle/fluid/operators/conv_cudnn_op.cu.cc | 13 +- paddle/fluid/operators/conv_fusion_op.cu.cc | 4 +- .../operators/distributed/communicator.cc | 7 +- paddle/fluid/platform/CMakeLists.txt | 6 +- paddle/fluid/platform/cpu_info.cc | 13 +- paddle/fluid/platform/flags.cc | 182 ++++++++++++++++++ paddle/fluid/platform/gpu_info.cc | 57 +----- paddle/fluid/platform/init.cc | 3 +- paddle/fluid/string/CMakeLists.txt | 6 +- 15 files changed, 218 insertions(+), 134 deletions(-) create mode 100644 paddle/fluid/platform/flags.cc diff --git a/paddle/fluid/framework/details/all_reduce_op_handle.cc b/paddle/fluid/framework/details/all_reduce_op_handle.cc index e2a0097cb1c..46fc7a54965 100644 --- a/paddle/fluid/framework/details/all_reduce_op_handle.cc +++ b/paddle/fluid/framework/details/all_reduce_op_handle.cc @@ -20,13 +20,9 @@ #include "paddle/fluid/platform/gpu_info.h" #include "paddle/fluid/platform/profiler.h" -// asynchronous nccl allreduce or synchronous issue: -// https://github.com/PaddlePaddle/Paddle/issues/15049 -// If you want to change this default value, why?(gongwb) -DEFINE_bool( - sync_nccl_allreduce, true, - "If set true, will call `cudaStreamSynchronize(nccl_stream)`" - "after allreduce, this mode can get better performance in some scenarios."); +#ifdef PADDLE_WITH_CUDA +DECLARE_bool(sync_nccl_allreduce); +#endif namespace paddle { namespace framework { diff --git a/paddle/fluid/framework/garbage_collector.cc b/paddle/fluid/framework/garbage_collector.cc index abab2fdb877..f100dc6349f 100644 --- a/paddle/fluid/framework/garbage_collector.cc +++ b/paddle/fluid/framework/garbage_collector.cc @@ -25,31 +25,13 @@ #include "glog/logging.h" #include "paddle/fluid/framework/garbage_collector.h" +DECLARE_double(eager_delete_tensor_gb); +DECLARE_double(memory_fraction_of_eager_deletion); +DECLARE_bool(fast_eager_deletion_mode); + namespace paddle { namespace framework { -// Disable gc by default when inference library is built -#ifdef PADDLE_ON_INFERENCE -static const double kDefaultEagerDeleteTensorGB = -1; -#else -static const double kDefaultEagerDeleteTensorGB = 0; -#endif - -DEFINE_double( - eager_delete_tensor_gb, kDefaultEagerDeleteTensorGB, - "Memory size threshold (GB) when the garbage collector clear tensors." - "Disabled when this value is less than 0"); - -DEFINE_bool(fast_eager_deletion_mode, true, - "Fast eager deletion mode. If enabled, memory would release " - "immediately without waiting GPU kernel ends."); - -DEFINE_double(memory_fraction_of_eager_deletion, 1.0, - "Fraction of eager deletion. If less than 1.0, all variables in " - "the program would be sorted according to its memory size, and " - "only the FLAGS_memory_fraction_of_eager_deletion of the largest " - "variables would be deleted."); - GarbageCollector::GarbageCollector(const platform::Place &place, size_t max_memory_size) : max_memory_size_((std::max)(max_memory_size, static_cast(1))) { diff --git a/paddle/fluid/framework/operator.cc b/paddle/fluid/framework/operator.cc index 3a06b522229..02a9eb20264 100644 --- a/paddle/fluid/framework/operator.cc +++ b/paddle/fluid/framework/operator.cc @@ -32,9 +32,7 @@ limitations under the License. */ #include "paddle/fluid/platform/profiler.h" DECLARE_bool(benchmark); -DEFINE_bool(check_nan_inf, false, - "Checking whether operator produce NAN/INF or not. It will be " - "extremely slow so please use this flag wisely."); +DECLARE_bool(check_nan_inf); DEFINE_int32(inner_op_parallelism, 0, "number of threads for inner op"); DEFINE_bool(fast_check_nan_inf, false, "Fast checking NAN/INF after each operation. It will be a little" diff --git a/paddle/fluid/framework/threadpool.cc b/paddle/fluid/framework/threadpool.cc index d34f826c1ab..7f7f426d0e2 100644 --- a/paddle/fluid/framework/threadpool.cc +++ b/paddle/fluid/framework/threadpool.cc @@ -13,6 +13,8 @@ limitations under the License. */ #include "paddle/fluid/framework/threadpool.h" +#include +#include #include "gflags/gflags.h" #include "paddle/fluid/platform/enforce.h" @@ -20,8 +22,7 @@ DEFINE_int32(io_threadpool_size, 100, "number of threads used for doing IO, default 100"); -DEFINE_int32(dist_threadpool_size, 0, - "number of threads used for distributed executed."); +DECLARE_int32(dist_threadpool_size); namespace paddle { namespace framework { diff --git a/paddle/fluid/memory/allocation/allocator_strategy.cc b/paddle/fluid/memory/allocation/allocator_strategy.cc index 4e45cc4d13b..19b1380612b 100644 --- a/paddle/fluid/memory/allocation/allocator_strategy.cc +++ b/paddle/fluid/memory/allocation/allocator_strategy.cc @@ -17,11 +17,7 @@ #include "glog/logging.h" #include "paddle/fluid/platform/enforce.h" -DEFINE_string(allocator_strategy, "naive_best_fit", - "The allocation strategy. naive_best_fit means the original best " - "fit allocator of Fluid. " - "auto_growth means the experimental auto-growth allocator. " - "Enum in [naive_best_fit, auto_growth]."); +DECLARE_string(allocator_strategy); namespace paddle { namespace memory { diff --git a/paddle/fluid/operators/batch_norm_op.cu b/paddle/fluid/operators/batch_norm_op.cu index a78a6726bc5..9b50a4a61a8 100644 --- a/paddle/fluid/operators/batch_norm_op.cu +++ b/paddle/fluid/operators/batch_norm_op.cu @@ -23,15 +23,7 @@ limitations under the License. */ #include "paddle/fluid/platform/cudnn_helper.h" #include "paddle/fluid/platform/float16.h" -// CUDNN_BATCHNORM_SPATIAL_PERSISTENT in batchnorm. This mode can be faster in -// some tasks because an optimized path may be selected for CUDNN_DATA_FLOAT -// and CUDNN_DATA_HALF data types, compute capability 6.0 or higher. The -// reason we set it to false by default is that this mode may use scaled -// atomic integer reduction that may cause a numerical overflow for certain -// input data range. -DEFINE_bool(cudnn_batchnorm_spatial_persistent, false, - "Whether enable CUDNN_BATCHNORM_SPATIAL_PERSISTENT mode for cudnn " - "batch_norm, default is False."); +DECLARE_bool(cudnn_batchnorm_spatial_persistent); namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/conv_cudnn_op.cu.cc b/paddle/fluid/operators/conv_cudnn_op.cu.cc index ec0278e5a23..7aa1419126d 100644 --- a/paddle/fluid/operators/conv_cudnn_op.cu.cc +++ b/paddle/fluid/operators/conv_cudnn_op.cu.cc @@ -24,16 +24,9 @@ limitations under the License. */ #include "paddle/fluid/platform/float16.h" #include "paddle/fluid/platform/profiler.h" -DEFINE_bool(cudnn_deterministic, false, - "Whether allow using an autotuning algorithm for convolution " - "operator. The autotuning algorithm may be non-deterministic. If " - "true, the algorithm is deterministic."); -DEFINE_uint64(conv_workspace_size_limit, - paddle::platform::kDefaultConvWorkspaceSizeLimitMB, - "cuDNN convolution workspace limit in MB unit."); -DEFINE_bool(cudnn_exhaustive_search, false, - "Whether enable exhaustive search for cuDNN convolution or " - "not, default is False."); +DECLARE_bool(cudnn_deterministic); +DECLARE_uint64(conv_workspace_size_limit); +DECLARE_bool(cudnn_exhaustive_search); namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/conv_fusion_op.cu.cc b/paddle/fluid/operators/conv_fusion_op.cu.cc index d1fa7b9d5bd..9b9b3e1d8bd 100644 --- a/paddle/fluid/operators/conv_fusion_op.cu.cc +++ b/paddle/fluid/operators/conv_fusion_op.cu.cc @@ -16,9 +16,7 @@ limitations under the License. */ #include "paddle/fluid/operators/conv_cudnn_op_cache.h" #include "paddle/fluid/platform/cudnn_helper.h" -DEFINE_int64(cudnn_exhaustive_search_times, -1, - "Exhaustive search times for cuDNN convolution, " - "default is -1, not exhaustive search"); +DECLARE_int64(cudnn_exhaustive_search_times); namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/distributed/communicator.cc b/paddle/fluid/operators/distributed/communicator.cc index af277d69c18..a7a761fa39a 100644 --- a/paddle/fluid/operators/distributed/communicator.cc +++ b/paddle/fluid/operators/distributed/communicator.cc @@ -26,18 +26,17 @@ limitations under the License. */ #include "paddle/fluid/operators/distributed/parameter_recv.h" #include "paddle/fluid/operators/distributed/parameter_send.h" +DECLARE_int32(communicator_max_merge_var_num); +DECLARE_int32(communicator_send_queue_size); + DEFINE_bool(communicator_independent_recv_thread, true, "use an independent to recv vars from parameter server"); -DEFINE_int32(communicator_send_queue_size, 20, - "queue size to recv gradient before send"); DEFINE_int32(communicator_min_send_grad_num_before_recv, 20, "max grad num to send before recv parameters"); DEFINE_int32(communicator_thread_pool_size, 5, "thread num to do send or recv"); DEFINE_int32(communicator_send_wait_times, 5, "times that send thread will wait if merge num does not reach " "max_merge_var_num"); -DEFINE_int32(communicator_max_merge_var_num, 20, - "max var num to merge and send"); DEFINE_bool(communicator_fake_rpc, false, "fake mode does not really send any thing"); DEFINE_bool(communicator_merge_sparse_grad, true, diff --git a/paddle/fluid/platform/CMakeLists.txt b/paddle/fluid/platform/CMakeLists.txt index 575eed355df..69435793a75 100644 --- a/paddle/fluid/platform/CMakeLists.txt +++ b/paddle/fluid/platform/CMakeLists.txt @@ -20,10 +20,12 @@ add_custom_command(TARGET profiler_py_proto POST_BUILD WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}) endif(NOT WIN32) +cc_library(flags SRCS flags.cc DEPS gflags) + if(WITH_GPU) - nv_library(enforce SRCS enforce.cc) + nv_library(enforce SRCS enforce.cc DEPS flags) else() - cc_library(enforce SRCS enforce.cc) + cc_library(enforce SRCS enforce.cc DEPS flags) endif() cc_test(enforce_test SRCS enforce_test.cc DEPS stringpiece enforce) diff --git a/paddle/fluid/platform/cpu_info.cc b/paddle/fluid/platform/cpu_info.cc index bdfe260793b..b7ed66bd363 100644 --- a/paddle/fluid/platform/cpu_info.cc +++ b/paddle/fluid/platform/cpu_info.cc @@ -32,16 +32,9 @@ limitations under the License. */ #include #include "gflags/gflags.h" -DEFINE_double(fraction_of_cpu_memory_to_use, 1, - "Default use 100% of CPU memory for PaddlePaddle," - "reserve the rest for page tables, etc"); -DEFINE_uint64(initial_cpu_memory_in_mb, 500ul, - "Initial CPU memory for PaddlePaddle, in MD unit."); - -DEFINE_double( - fraction_of_cuda_pinned_memory_to_use, 0.5, - "Default use 50% of CPU memory as the pinned_memory for PaddlePaddle," - "reserve the rest for page tables, etc"); +DECLARE_double(fraction_of_cpu_memory_to_use); +DECLARE_uint64(initial_cpu_memory_in_mb); +DECLARE_double(fraction_of_cuda_pinned_memory_to_use); // If use_pinned_memory is true, CPUAllocator calls mlock, which // returns pinned and locked memory as staging areas for data exchange diff --git a/paddle/fluid/platform/flags.cc b/paddle/fluid/platform/flags.cc new file mode 100644 index 00000000000..b2224b05bef --- /dev/null +++ b/paddle/fluid/platform/flags.cc @@ -0,0 +1,182 @@ +// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "gflags/gflags.h" +#ifdef PADDLE_WITH_CUDA +#include "paddle/fluid/platform/cudnn_workspace_helper.h" +#endif + +/** + * NOTE(paddle-dev): This file is designed to define all public FLAGS. + */ + +/* Paddle initialization related */ +DEFINE_int32(paddle_num_threads, 1, + "Number of threads for each paddle instance."); + +/* Operator related */ +DEFINE_bool(check_nan_inf, false, + "Checking whether operator produce NAN/INF or not. It will be " + "extremely slow so please use this flag wisely."); + +/* CUDA related */ +#ifdef PADDLE_WITH_CUDA +DEFINE_bool( + enable_cublas_tensor_op_math, false, + "The enable_cublas_tensor_op_math indicate whether to use Tensor Core, " + "but it may loss precision. Currently, There are two CUDA libraries that" + " use Tensor Cores, cuBLAS and cuDNN. cuBLAS uses Tensor Cores to speed up" + " GEMM computations(the matrices must be either half precision or single " + "precision); cuDNN uses Tensor Cores to speed up both convolutions(the " + "input and output must be half precision) and recurrent neural networks " + "(RNNs)."); + +DEFINE_string(selected_gpus, "", + "A list of device ids separated by comma, like: 0,1,2,3. " + "This option is useful when doing multi process training and " + "each process have only one device (GPU). If you want to use " + "all visible devices, set this to empty string. NOTE: the " + "reason of doing this is that we want to use P2P communication" + "between GPU devices, use CUDA_VISIBLE_DEVICES can only use" + "share-memory only."); +#endif + +/* CUDNN related */ +#ifdef PADDLE_WITH_CUDA +DEFINE_bool(cudnn_deterministic, false, + "Whether allow using an autotuning algorithm for convolution " + "operator. The autotuning algorithm may be non-deterministic. If " + "true, the algorithm is deterministic."); + +DEFINE_uint64(conv_workspace_size_limit, + paddle::platform::kDefaultConvWorkspaceSizeLimitMB, + "cuDNN convolution workspace limit in MB unit."); + +DEFINE_bool(cudnn_exhaustive_search, false, + "Whether enable exhaustive search for cuDNN convolution or " + "not, default is False."); + +DEFINE_int64(cudnn_exhaustive_search_times, -1, + "Exhaustive search times for cuDNN convolution, " + "default is -1, not exhaustive search"); + +// CUDNN_BATCHNORM_SPATIAL_PERSISTENT in batchnorm. This mode can be faster in +// some tasks because an optimized path may be selected for CUDNN_DATA_FLOAT +// and CUDNN_DATA_HALF data types, compute capability 6.0 or higher. The +// reason we set it to false by default is that this mode may use scaled +// atomic integer reduction that may cause a numerical overflow for certain +// input data range. +DEFINE_bool(cudnn_batchnorm_spatial_persistent, false, + "Whether enable CUDNN_BATCHNORM_SPATIAL_PERSISTENT mode for cudnn " + "batch_norm, default is False."); +#endif + +/* NCCL related */ +#ifdef PADDLE_WITH_CUDA +// asynchronous nccl allreduce or synchronous issue: +// https://github.com/PaddlePaddle/Paddle/issues/15049 +// If you want to change this default value, why?(gongwb) +DEFINE_bool( + sync_nccl_allreduce, true, + "If set true, will call `cudaStreamSynchronize(nccl_stream)`" + "after allreduce, this mode can get better performance in some scenarios."); +#endif + +/* Distributed related */ +#ifdef PADDLE_WITH_DISTRIBUTE +DEFINE_int32(communicator_max_merge_var_num, 20, + "max var num to merge and send"); +DEFINE_int32(communicator_send_queue_size, 20, + "queue size to recv gradient before send"); +#endif + +DEFINE_int32(dist_threadpool_size, 0, + "number of threads used for distributed executed."); + +/* Garbage collector related */ +// Disable gc by default when inference library is built +#ifdef PADDLE_ON_INFERENCE +static const double kDefaultEagerDeleteTensorGB = -1; +#else +static const double kDefaultEagerDeleteTensorGB = 0; +#endif + +DEFINE_double( + eager_delete_tensor_gb, kDefaultEagerDeleteTensorGB, + "Memory size threshold (GB) when the garbage collector clear tensors." + "Disabled when this value is less than 0"); + +DEFINE_bool(fast_eager_deletion_mode, true, + "Fast eager deletion mode. If enabled, memory would release " + "immediately without waiting GPU kernel ends."); + +DEFINE_double(memory_fraction_of_eager_deletion, 1.0, + "Fraction of eager deletion. If less than 1.0, all variables in " + "the program would be sorted according to its memory size, and " + "only the FLAGS_memory_fraction_of_eager_deletion of the largest " + "variables would be deleted."); + +/* Allocator related */ +DEFINE_string(allocator_strategy, "naive_best_fit", + "The allocation strategy. naive_best_fit means the original best " + "fit allocator of Fluid. " + "auto_growth means the experimental auto-growth allocator. " + "Enum in [naive_best_fit, auto_growth]."); + +DEFINE_double(fraction_of_cpu_memory_to_use, 1, + "Default use 100% of CPU memory for PaddlePaddle," + "reserve the rest for page tables, etc"); +DEFINE_uint64(initial_cpu_memory_in_mb, 500ul, + "Initial CPU memory for PaddlePaddle, in MD unit."); + +DEFINE_double( + fraction_of_cuda_pinned_memory_to_use, 0.5, + "Default use 50% of CPU memory as the pinned_memory for PaddlePaddle," + "reserve the rest for page tables, etc"); + +#ifdef PADDLE_WITH_CUDA +#ifndef _WIN32 +constexpr static float fraction_of_gpu_memory_to_use = 0.92f; +#else +// fraction_of_gpu_memory_to_use cannot be too high on windows, +// since the win32 graphic sub-system can occupy some GPU memory +// which may lead to insufficient memory left for paddle +constexpr static float fraction_of_gpu_memory_to_use = 0.5f; +#endif + +DEFINE_double(fraction_of_gpu_memory_to_use, fraction_of_gpu_memory_to_use, + "Allocate a trunk of gpu memory that is this fraction of the " + "total gpu memory size. Future memory usage will be allocated " + "from the trunk. If the trunk doesn't have enough gpu memory, " + "additional trunks of the same size will be requested from gpu " + "until the gpu has no memory left for another trunk."); + +DEFINE_uint64( + initial_gpu_memory_in_mb, 0ul, + "Allocate a trunk of gpu memory whose byte size is specified by " + "the flag. Future memory usage will be allocated from the " + "trunk. If the trunk doesn't have enough gpu memory, additional " + "trunks of the gpu memory will be requested from gpu with size " + "specified by FLAGS_reallocate_gpu_memory_in_mb until the gpu has " + "no memory left for the additional trunk. Note: if you set this " + "flag, the memory size set by " + "FLAGS_fraction_of_gpu_memory_to_use will be overrided by this " + "flag. If you don't set this flag, PaddlePaddle will use " + "FLAGS_fraction_of_gpu_memory_to_use to allocate gpu memory"); + +DEFINE_uint64(reallocate_gpu_memory_in_mb, 0ul, + "If this flag is set, Paddle will reallocate the gpu memory with " + "size specified by this flag. Else Paddle will reallocate by " + "FLAGS_fraction_of_gpu_memory_to_use"); +#endif diff --git a/paddle/fluid/platform/gpu_info.cc b/paddle/fluid/platform/gpu_info.cc index 5fce95d63f9..8191d688472 100644 --- a/paddle/fluid/platform/gpu_info.cc +++ b/paddle/fluid/platform/gpu_info.cc @@ -21,61 +21,14 @@ limitations under the License. */ #include "paddle/fluid/platform/enforce.h" #include "paddle/fluid/string/split.h" -#ifndef _WIN32 -constexpr static float fraction_of_gpu_memory_to_use = 0.92f; -#else -// fraction_of_gpu_memory_to_use cannot be too high on windows, -// since the win32 graphic sub-system can occupy some GPU memory -// which may lead to insufficient memory left for paddle -constexpr static float fraction_of_gpu_memory_to_use = 0.5f; -#endif +DECLARE_double(fraction_of_gpu_memory_to_use); +DECLARE_uint64(initial_gpu_memory_in_mb); +DECLARE_uint64(reallocate_gpu_memory_in_mb); +DECLARE_bool(enable_cublas_tensor_op_math); +DECLARE_string(selected_gpus); constexpr static float fraction_reserve_gpu_memory = 0.05f; -DEFINE_double(fraction_of_gpu_memory_to_use, fraction_of_gpu_memory_to_use, - "Allocate a trunk of gpu memory that is this fraction of the " - "total gpu memory size. Future memory usage will be allocated " - "from the trunk. If the trunk doesn't have enough gpu memory, " - "additional trunks of the same size will be requested from gpu " - "until the gpu has no memory left for another trunk."); - -DEFINE_uint64( - initial_gpu_memory_in_mb, 0ul, - "Allocate a trunk of gpu memory whose byte size is specified by " - "the flag. Future memory usage will be allocated from the " - "trunk. If the trunk doesn't have enough gpu memory, additional " - "trunks of the gpu memory will be requested from gpu with size " - "specified by FLAGS_reallocate_gpu_memory_in_mb until the gpu has " - "no memory left for the additional trunk. Note: if you set this " - "flag, the memory size set by " - "FLAGS_fraction_of_gpu_memory_to_use will be overrided by this " - "flag. If you don't set this flag, PaddlePaddle will use " - "FLAGS_fraction_of_gpu_memory_to_use to allocate gpu memory"); - -DEFINE_uint64(reallocate_gpu_memory_in_mb, 0ul, - "If this flag is set, Paddle will reallocate the gpu memory with " - "size specified by this flag. Else Paddle will reallocate by " - "FLAGS_fraction_of_gpu_memory_to_use"); - -DEFINE_bool( - enable_cublas_tensor_op_math, false, - "The enable_cublas_tensor_op_math indicate whether to use Tensor Core, " - "but it may loss precision. Currently, There are two CUDA libraries that" - " use Tensor Cores, cuBLAS and cuDNN. cuBLAS uses Tensor Cores to speed up" - " GEMM computations(the matrices must be either half precision or single " - "precision); cuDNN uses Tensor Cores to speed up both convolutions(the " - "input and output must be half precision) and recurrent neural networks " - "(RNNs)."); - -DEFINE_string(selected_gpus, "", - "A list of device ids separated by comma, like: 0,1,2,3. " - "This option is useful when doing multi process training and " - "each process have only one device (GPU). If you want to use " - "all visible devices, set this to empty string. NOTE: the " - "reason of doing this is that we want to use P2P communication" - "between GPU devices, use CUDA_VISIBLE_DEVICES can only use" - "share-memory only."); - namespace paddle { namespace platform { diff --git a/paddle/fluid/platform/init.cc b/paddle/fluid/platform/init.cc index 9b7b21208eb..feb6b1e7dc1 100644 --- a/paddle/fluid/platform/init.cc +++ b/paddle/fluid/platform/init.cc @@ -36,8 +36,7 @@ limitations under the License. */ #include "dgc/dgc.h" #endif -DEFINE_int32(paddle_num_threads, 1, - "Number of threads for each paddle instance."); +DECLARE_int32(paddle_num_threads); DEFINE_int32(multiple_of_cupti_buffer_size, 1, "Multiple of the CUPTI device buffer size. If the timestamps have " "been dropped when you are profiling, try increasing this value."); diff --git a/paddle/fluid/string/CMakeLists.txt b/paddle/fluid/string/CMakeLists.txt index 49a8fb82dbf..a465f5909a7 100644 --- a/paddle/fluid/string/CMakeLists.txt +++ b/paddle/fluid/string/CMakeLists.txt @@ -1,6 +1,6 @@ -cc_library(stringpiece SRCS piece.cc) -cc_library(pretty_log SRCS pretty_log.cc) -cc_library(string_helper SRCS string_helper.cc DEPS boost) +cc_library(stringpiece SRCS piece.cc DEPS flags) +cc_library(pretty_log SRCS pretty_log.cc DEPS flags) +cc_library(string_helper SRCS string_helper.cc DEPS boost flags) cc_test(stringpiece_test SRCS piece_test.cc DEPS stringpiece glog gflags) cc_test(stringprintf_test SRCS printf_test.cc DEPS glog gflags) cc_test(to_string_test SRCS to_string_test.cc) -- GitLab