diff --git a/paddle/fluid/framework/details/all_reduce_op_handle.cc b/paddle/fluid/framework/details/all_reduce_op_handle.cc index e2a0097cb1c20ef6f7987a162cbf233628fd03d6..46fc7a5496555f7fa0642d9910721b711c81d3b8 100644 --- a/paddle/fluid/framework/details/all_reduce_op_handle.cc +++ b/paddle/fluid/framework/details/all_reduce_op_handle.cc @@ -20,13 +20,9 @@ #include "paddle/fluid/platform/gpu_info.h" #include "paddle/fluid/platform/profiler.h" -// asynchronous nccl allreduce or synchronous issue: -// https://github.com/PaddlePaddle/Paddle/issues/15049 -// If you want to change this default value, why?(gongwb) -DEFINE_bool( - sync_nccl_allreduce, true, - "If set true, will call `cudaStreamSynchronize(nccl_stream)`" - "after allreduce, this mode can get better performance in some scenarios."); +#ifdef PADDLE_WITH_CUDA +DECLARE_bool(sync_nccl_allreduce); +#endif namespace paddle { namespace framework { diff --git a/paddle/fluid/framework/garbage_collector.cc b/paddle/fluid/framework/garbage_collector.cc index abab2fdb8773e27c725890ce8ca7fcb321019c6c..f100dc6349f58260ed6c501da6148efe50437fee 100644 --- a/paddle/fluid/framework/garbage_collector.cc +++ b/paddle/fluid/framework/garbage_collector.cc @@ -25,31 +25,13 @@ #include "glog/logging.h" #include "paddle/fluid/framework/garbage_collector.h" +DECLARE_double(eager_delete_tensor_gb); +DECLARE_double(memory_fraction_of_eager_deletion); +DECLARE_bool(fast_eager_deletion_mode); + namespace paddle { namespace framework { -// Disable gc by default when inference library is built -#ifdef PADDLE_ON_INFERENCE -static const double kDefaultEagerDeleteTensorGB = -1; -#else -static const double kDefaultEagerDeleteTensorGB = 0; -#endif - -DEFINE_double( - eager_delete_tensor_gb, kDefaultEagerDeleteTensorGB, - "Memory size threshold (GB) when the garbage collector clear tensors." - "Disabled when this value is less than 0"); - -DEFINE_bool(fast_eager_deletion_mode, true, - "Fast eager deletion mode. If enabled, memory would release " - "immediately without waiting GPU kernel ends."); - -DEFINE_double(memory_fraction_of_eager_deletion, 1.0, - "Fraction of eager deletion. If less than 1.0, all variables in " - "the program would be sorted according to its memory size, and " - "only the FLAGS_memory_fraction_of_eager_deletion of the largest " - "variables would be deleted."); - GarbageCollector::GarbageCollector(const platform::Place &place, size_t max_memory_size) : max_memory_size_((std::max)(max_memory_size, static_cast(1))) { diff --git a/paddle/fluid/framework/operator.cc b/paddle/fluid/framework/operator.cc index 3a06b5222297d033c71c6d4fe9155699c015f3ed..02a9eb20264a5de028f29c8a86f459ae5461ba9e 100644 --- a/paddle/fluid/framework/operator.cc +++ b/paddle/fluid/framework/operator.cc @@ -32,9 +32,7 @@ limitations under the License. */ #include "paddle/fluid/platform/profiler.h" DECLARE_bool(benchmark); -DEFINE_bool(check_nan_inf, false, - "Checking whether operator produce NAN/INF or not. It will be " - "extremely slow so please use this flag wisely."); +DECLARE_bool(check_nan_inf); DEFINE_int32(inner_op_parallelism, 0, "number of threads for inner op"); DEFINE_bool(fast_check_nan_inf, false, "Fast checking NAN/INF after each operation. It will be a little" diff --git a/paddle/fluid/framework/threadpool.cc b/paddle/fluid/framework/threadpool.cc index d34f826c1abb99198fd4dbe9537495edff7b63af..7f7f426d0e28224932fc96a3fefa0df1279e6475 100644 --- a/paddle/fluid/framework/threadpool.cc +++ b/paddle/fluid/framework/threadpool.cc @@ -13,6 +13,8 @@ limitations under the License. */ #include "paddle/fluid/framework/threadpool.h" +#include +#include #include "gflags/gflags.h" #include "paddle/fluid/platform/enforce.h" @@ -20,8 +22,7 @@ DEFINE_int32(io_threadpool_size, 100, "number of threads used for doing IO, default 100"); -DEFINE_int32(dist_threadpool_size, 0, - "number of threads used for distributed executed."); +DECLARE_int32(dist_threadpool_size); namespace paddle { namespace framework { diff --git a/paddle/fluid/memory/allocation/allocator_strategy.cc b/paddle/fluid/memory/allocation/allocator_strategy.cc index 4e45cc4d13b0d5abcb10bd9e34993bc0b8c17485..19b1380612b6de2387771e633ee0604bdc30046f 100644 --- a/paddle/fluid/memory/allocation/allocator_strategy.cc +++ b/paddle/fluid/memory/allocation/allocator_strategy.cc @@ -17,11 +17,7 @@ #include "glog/logging.h" #include "paddle/fluid/platform/enforce.h" -DEFINE_string(allocator_strategy, "naive_best_fit", - "The allocation strategy. naive_best_fit means the original best " - "fit allocator of Fluid. " - "auto_growth means the experimental auto-growth allocator. " - "Enum in [naive_best_fit, auto_growth]."); +DECLARE_string(allocator_strategy); namespace paddle { namespace memory { diff --git a/paddle/fluid/operators/batch_norm_op.cu b/paddle/fluid/operators/batch_norm_op.cu index a78a6726bc5a59cc84494656dc53e31e40eb82b3..9b50a4a61a87a61088d8c34ebcc06a2a281a01c0 100644 --- a/paddle/fluid/operators/batch_norm_op.cu +++ b/paddle/fluid/operators/batch_norm_op.cu @@ -23,15 +23,7 @@ limitations under the License. */ #include "paddle/fluid/platform/cudnn_helper.h" #include "paddle/fluid/platform/float16.h" -// CUDNN_BATCHNORM_SPATIAL_PERSISTENT in batchnorm. This mode can be faster in -// some tasks because an optimized path may be selected for CUDNN_DATA_FLOAT -// and CUDNN_DATA_HALF data types, compute capability 6.0 or higher. The -// reason we set it to false by default is that this mode may use scaled -// atomic integer reduction that may cause a numerical overflow for certain -// input data range. -DEFINE_bool(cudnn_batchnorm_spatial_persistent, false, - "Whether enable CUDNN_BATCHNORM_SPATIAL_PERSISTENT mode for cudnn " - "batch_norm, default is False."); +DECLARE_bool(cudnn_batchnorm_spatial_persistent); namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/conv_cudnn_op.cu.cc b/paddle/fluid/operators/conv_cudnn_op.cu.cc index ec0278e5a230ec9c5cbb38855d0c2a07912f332c..7aa1419126d31ec89fc46bbaa3b23b7516f3ab27 100644 --- a/paddle/fluid/operators/conv_cudnn_op.cu.cc +++ b/paddle/fluid/operators/conv_cudnn_op.cu.cc @@ -24,16 +24,9 @@ limitations under the License. */ #include "paddle/fluid/platform/float16.h" #include "paddle/fluid/platform/profiler.h" -DEFINE_bool(cudnn_deterministic, false, - "Whether allow using an autotuning algorithm for convolution " - "operator. The autotuning algorithm may be non-deterministic. If " - "true, the algorithm is deterministic."); -DEFINE_uint64(conv_workspace_size_limit, - paddle::platform::kDefaultConvWorkspaceSizeLimitMB, - "cuDNN convolution workspace limit in MB unit."); -DEFINE_bool(cudnn_exhaustive_search, false, - "Whether enable exhaustive search for cuDNN convolution or " - "not, default is False."); +DECLARE_bool(cudnn_deterministic); +DECLARE_uint64(conv_workspace_size_limit); +DECLARE_bool(cudnn_exhaustive_search); namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/conv_fusion_op.cu.cc b/paddle/fluid/operators/conv_fusion_op.cu.cc index d1fa7b9d5bd81b164e51cb7a5353ed1d06f221b1..9b9b3e1d8bd6e3196d34e2b0efb2e1433f3a6016 100644 --- a/paddle/fluid/operators/conv_fusion_op.cu.cc +++ b/paddle/fluid/operators/conv_fusion_op.cu.cc @@ -16,9 +16,7 @@ limitations under the License. */ #include "paddle/fluid/operators/conv_cudnn_op_cache.h" #include "paddle/fluid/platform/cudnn_helper.h" -DEFINE_int64(cudnn_exhaustive_search_times, -1, - "Exhaustive search times for cuDNN convolution, " - "default is -1, not exhaustive search"); +DECLARE_int64(cudnn_exhaustive_search_times); namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/distributed/communicator.cc b/paddle/fluid/operators/distributed/communicator.cc index af277d69c18670e31cb8fd9991b33b915261778e..a7a761fa39a7390f78b5b9b2209d12ea5ac24c30 100644 --- a/paddle/fluid/operators/distributed/communicator.cc +++ b/paddle/fluid/operators/distributed/communicator.cc @@ -26,18 +26,17 @@ limitations under the License. */ #include "paddle/fluid/operators/distributed/parameter_recv.h" #include "paddle/fluid/operators/distributed/parameter_send.h" +DECLARE_int32(communicator_max_merge_var_num); +DECLARE_int32(communicator_send_queue_size); + DEFINE_bool(communicator_independent_recv_thread, true, "use an independent to recv vars from parameter server"); -DEFINE_int32(communicator_send_queue_size, 20, - "queue size to recv gradient before send"); DEFINE_int32(communicator_min_send_grad_num_before_recv, 20, "max grad num to send before recv parameters"); DEFINE_int32(communicator_thread_pool_size, 5, "thread num to do send or recv"); DEFINE_int32(communicator_send_wait_times, 5, "times that send thread will wait if merge num does not reach " "max_merge_var_num"); -DEFINE_int32(communicator_max_merge_var_num, 20, - "max var num to merge and send"); DEFINE_bool(communicator_fake_rpc, false, "fake mode does not really send any thing"); DEFINE_bool(communicator_merge_sparse_grad, true, diff --git a/paddle/fluid/platform/CMakeLists.txt b/paddle/fluid/platform/CMakeLists.txt index 575eed355df3e07e2f13a3a3656a325caff0f9ff..69435793a75a203533806a567c718e0af4d2e20c 100644 --- a/paddle/fluid/platform/CMakeLists.txt +++ b/paddle/fluid/platform/CMakeLists.txt @@ -20,10 +20,12 @@ add_custom_command(TARGET profiler_py_proto POST_BUILD WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}) endif(NOT WIN32) +cc_library(flags SRCS flags.cc DEPS gflags) + if(WITH_GPU) - nv_library(enforce SRCS enforce.cc) + nv_library(enforce SRCS enforce.cc DEPS flags) else() - cc_library(enforce SRCS enforce.cc) + cc_library(enforce SRCS enforce.cc DEPS flags) endif() cc_test(enforce_test SRCS enforce_test.cc DEPS stringpiece enforce) diff --git a/paddle/fluid/platform/cpu_info.cc b/paddle/fluid/platform/cpu_info.cc index bdfe260793b638881a46a8d663876eeda4ed932f..b7ed66bd36369b0b31df3afbbd18e49fba8e23e1 100644 --- a/paddle/fluid/platform/cpu_info.cc +++ b/paddle/fluid/platform/cpu_info.cc @@ -32,16 +32,9 @@ limitations under the License. */ #include #include "gflags/gflags.h" -DEFINE_double(fraction_of_cpu_memory_to_use, 1, - "Default use 100% of CPU memory for PaddlePaddle," - "reserve the rest for page tables, etc"); -DEFINE_uint64(initial_cpu_memory_in_mb, 500ul, - "Initial CPU memory for PaddlePaddle, in MD unit."); - -DEFINE_double( - fraction_of_cuda_pinned_memory_to_use, 0.5, - "Default use 50% of CPU memory as the pinned_memory for PaddlePaddle," - "reserve the rest for page tables, etc"); +DECLARE_double(fraction_of_cpu_memory_to_use); +DECLARE_uint64(initial_cpu_memory_in_mb); +DECLARE_double(fraction_of_cuda_pinned_memory_to_use); // If use_pinned_memory is true, CPUAllocator calls mlock, which // returns pinned and locked memory as staging areas for data exchange diff --git a/paddle/fluid/platform/flags.cc b/paddle/fluid/platform/flags.cc new file mode 100644 index 0000000000000000000000000000000000000000..b2224b05bef04d793cc40a4a4d30f51704b75da1 --- /dev/null +++ b/paddle/fluid/platform/flags.cc @@ -0,0 +1,182 @@ +// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "gflags/gflags.h" +#ifdef PADDLE_WITH_CUDA +#include "paddle/fluid/platform/cudnn_workspace_helper.h" +#endif + +/** + * NOTE(paddle-dev): This file is designed to define all public FLAGS. + */ + +/* Paddle initialization related */ +DEFINE_int32(paddle_num_threads, 1, + "Number of threads for each paddle instance."); + +/* Operator related */ +DEFINE_bool(check_nan_inf, false, + "Checking whether operator produce NAN/INF or not. It will be " + "extremely slow so please use this flag wisely."); + +/* CUDA related */ +#ifdef PADDLE_WITH_CUDA +DEFINE_bool( + enable_cublas_tensor_op_math, false, + "The enable_cublas_tensor_op_math indicate whether to use Tensor Core, " + "but it may loss precision. Currently, There are two CUDA libraries that" + " use Tensor Cores, cuBLAS and cuDNN. cuBLAS uses Tensor Cores to speed up" + " GEMM computations(the matrices must be either half precision or single " + "precision); cuDNN uses Tensor Cores to speed up both convolutions(the " + "input and output must be half precision) and recurrent neural networks " + "(RNNs)."); + +DEFINE_string(selected_gpus, "", + "A list of device ids separated by comma, like: 0,1,2,3. " + "This option is useful when doing multi process training and " + "each process have only one device (GPU). If you want to use " + "all visible devices, set this to empty string. NOTE: the " + "reason of doing this is that we want to use P2P communication" + "between GPU devices, use CUDA_VISIBLE_DEVICES can only use" + "share-memory only."); +#endif + +/* CUDNN related */ +#ifdef PADDLE_WITH_CUDA +DEFINE_bool(cudnn_deterministic, false, + "Whether allow using an autotuning algorithm for convolution " + "operator. The autotuning algorithm may be non-deterministic. If " + "true, the algorithm is deterministic."); + +DEFINE_uint64(conv_workspace_size_limit, + paddle::platform::kDefaultConvWorkspaceSizeLimitMB, + "cuDNN convolution workspace limit in MB unit."); + +DEFINE_bool(cudnn_exhaustive_search, false, + "Whether enable exhaustive search for cuDNN convolution or " + "not, default is False."); + +DEFINE_int64(cudnn_exhaustive_search_times, -1, + "Exhaustive search times for cuDNN convolution, " + "default is -1, not exhaustive search"); + +// CUDNN_BATCHNORM_SPATIAL_PERSISTENT in batchnorm. This mode can be faster in +// some tasks because an optimized path may be selected for CUDNN_DATA_FLOAT +// and CUDNN_DATA_HALF data types, compute capability 6.0 or higher. The +// reason we set it to false by default is that this mode may use scaled +// atomic integer reduction that may cause a numerical overflow for certain +// input data range. +DEFINE_bool(cudnn_batchnorm_spatial_persistent, false, + "Whether enable CUDNN_BATCHNORM_SPATIAL_PERSISTENT mode for cudnn " + "batch_norm, default is False."); +#endif + +/* NCCL related */ +#ifdef PADDLE_WITH_CUDA +// asynchronous nccl allreduce or synchronous issue: +// https://github.com/PaddlePaddle/Paddle/issues/15049 +// If you want to change this default value, why?(gongwb) +DEFINE_bool( + sync_nccl_allreduce, true, + "If set true, will call `cudaStreamSynchronize(nccl_stream)`" + "after allreduce, this mode can get better performance in some scenarios."); +#endif + +/* Distributed related */ +#ifdef PADDLE_WITH_DISTRIBUTE +DEFINE_int32(communicator_max_merge_var_num, 20, + "max var num to merge and send"); +DEFINE_int32(communicator_send_queue_size, 20, + "queue size to recv gradient before send"); +#endif + +DEFINE_int32(dist_threadpool_size, 0, + "number of threads used for distributed executed."); + +/* Garbage collector related */ +// Disable gc by default when inference library is built +#ifdef PADDLE_ON_INFERENCE +static const double kDefaultEagerDeleteTensorGB = -1; +#else +static const double kDefaultEagerDeleteTensorGB = 0; +#endif + +DEFINE_double( + eager_delete_tensor_gb, kDefaultEagerDeleteTensorGB, + "Memory size threshold (GB) when the garbage collector clear tensors." + "Disabled when this value is less than 0"); + +DEFINE_bool(fast_eager_deletion_mode, true, + "Fast eager deletion mode. If enabled, memory would release " + "immediately without waiting GPU kernel ends."); + +DEFINE_double(memory_fraction_of_eager_deletion, 1.0, + "Fraction of eager deletion. If less than 1.0, all variables in " + "the program would be sorted according to its memory size, and " + "only the FLAGS_memory_fraction_of_eager_deletion of the largest " + "variables would be deleted."); + +/* Allocator related */ +DEFINE_string(allocator_strategy, "naive_best_fit", + "The allocation strategy. naive_best_fit means the original best " + "fit allocator of Fluid. " + "auto_growth means the experimental auto-growth allocator. " + "Enum in [naive_best_fit, auto_growth]."); + +DEFINE_double(fraction_of_cpu_memory_to_use, 1, + "Default use 100% of CPU memory for PaddlePaddle," + "reserve the rest for page tables, etc"); +DEFINE_uint64(initial_cpu_memory_in_mb, 500ul, + "Initial CPU memory for PaddlePaddle, in MD unit."); + +DEFINE_double( + fraction_of_cuda_pinned_memory_to_use, 0.5, + "Default use 50% of CPU memory as the pinned_memory for PaddlePaddle," + "reserve the rest for page tables, etc"); + +#ifdef PADDLE_WITH_CUDA +#ifndef _WIN32 +constexpr static float fraction_of_gpu_memory_to_use = 0.92f; +#else +// fraction_of_gpu_memory_to_use cannot be too high on windows, +// since the win32 graphic sub-system can occupy some GPU memory +// which may lead to insufficient memory left for paddle +constexpr static float fraction_of_gpu_memory_to_use = 0.5f; +#endif + +DEFINE_double(fraction_of_gpu_memory_to_use, fraction_of_gpu_memory_to_use, + "Allocate a trunk of gpu memory that is this fraction of the " + "total gpu memory size. Future memory usage will be allocated " + "from the trunk. If the trunk doesn't have enough gpu memory, " + "additional trunks of the same size will be requested from gpu " + "until the gpu has no memory left for another trunk."); + +DEFINE_uint64( + initial_gpu_memory_in_mb, 0ul, + "Allocate a trunk of gpu memory whose byte size is specified by " + "the flag. Future memory usage will be allocated from the " + "trunk. If the trunk doesn't have enough gpu memory, additional " + "trunks of the gpu memory will be requested from gpu with size " + "specified by FLAGS_reallocate_gpu_memory_in_mb until the gpu has " + "no memory left for the additional trunk. Note: if you set this " + "flag, the memory size set by " + "FLAGS_fraction_of_gpu_memory_to_use will be overrided by this " + "flag. If you don't set this flag, PaddlePaddle will use " + "FLAGS_fraction_of_gpu_memory_to_use to allocate gpu memory"); + +DEFINE_uint64(reallocate_gpu_memory_in_mb, 0ul, + "If this flag is set, Paddle will reallocate the gpu memory with " + "size specified by this flag. Else Paddle will reallocate by " + "FLAGS_fraction_of_gpu_memory_to_use"); +#endif diff --git a/paddle/fluid/platform/gpu_info.cc b/paddle/fluid/platform/gpu_info.cc index 5fce95d63f990db091ce5f8072654f6e346b5c1c..8191d688472a3eb0f297936f3387e77809a20e2f 100644 --- a/paddle/fluid/platform/gpu_info.cc +++ b/paddle/fluid/platform/gpu_info.cc @@ -21,61 +21,14 @@ limitations under the License. */ #include "paddle/fluid/platform/enforce.h" #include "paddle/fluid/string/split.h" -#ifndef _WIN32 -constexpr static float fraction_of_gpu_memory_to_use = 0.92f; -#else -// fraction_of_gpu_memory_to_use cannot be too high on windows, -// since the win32 graphic sub-system can occupy some GPU memory -// which may lead to insufficient memory left for paddle -constexpr static float fraction_of_gpu_memory_to_use = 0.5f; -#endif +DECLARE_double(fraction_of_gpu_memory_to_use); +DECLARE_uint64(initial_gpu_memory_in_mb); +DECLARE_uint64(reallocate_gpu_memory_in_mb); +DECLARE_bool(enable_cublas_tensor_op_math); +DECLARE_string(selected_gpus); constexpr static float fraction_reserve_gpu_memory = 0.05f; -DEFINE_double(fraction_of_gpu_memory_to_use, fraction_of_gpu_memory_to_use, - "Allocate a trunk of gpu memory that is this fraction of the " - "total gpu memory size. Future memory usage will be allocated " - "from the trunk. If the trunk doesn't have enough gpu memory, " - "additional trunks of the same size will be requested from gpu " - "until the gpu has no memory left for another trunk."); - -DEFINE_uint64( - initial_gpu_memory_in_mb, 0ul, - "Allocate a trunk of gpu memory whose byte size is specified by " - "the flag. Future memory usage will be allocated from the " - "trunk. If the trunk doesn't have enough gpu memory, additional " - "trunks of the gpu memory will be requested from gpu with size " - "specified by FLAGS_reallocate_gpu_memory_in_mb until the gpu has " - "no memory left for the additional trunk. Note: if you set this " - "flag, the memory size set by " - "FLAGS_fraction_of_gpu_memory_to_use will be overrided by this " - "flag. If you don't set this flag, PaddlePaddle will use " - "FLAGS_fraction_of_gpu_memory_to_use to allocate gpu memory"); - -DEFINE_uint64(reallocate_gpu_memory_in_mb, 0ul, - "If this flag is set, Paddle will reallocate the gpu memory with " - "size specified by this flag. Else Paddle will reallocate by " - "FLAGS_fraction_of_gpu_memory_to_use"); - -DEFINE_bool( - enable_cublas_tensor_op_math, false, - "The enable_cublas_tensor_op_math indicate whether to use Tensor Core, " - "but it may loss precision. Currently, There are two CUDA libraries that" - " use Tensor Cores, cuBLAS and cuDNN. cuBLAS uses Tensor Cores to speed up" - " GEMM computations(the matrices must be either half precision or single " - "precision); cuDNN uses Tensor Cores to speed up both convolutions(the " - "input and output must be half precision) and recurrent neural networks " - "(RNNs)."); - -DEFINE_string(selected_gpus, "", - "A list of device ids separated by comma, like: 0,1,2,3. " - "This option is useful when doing multi process training and " - "each process have only one device (GPU). If you want to use " - "all visible devices, set this to empty string. NOTE: the " - "reason of doing this is that we want to use P2P communication" - "between GPU devices, use CUDA_VISIBLE_DEVICES can only use" - "share-memory only."); - namespace paddle { namespace platform { diff --git a/paddle/fluid/platform/init.cc b/paddle/fluid/platform/init.cc index 9b7b21208eb51691963ac15b90e3182f3afcf81d..feb6b1e7dc1b03189efe89e734537eb4101e68bf 100644 --- a/paddle/fluid/platform/init.cc +++ b/paddle/fluid/platform/init.cc @@ -36,8 +36,7 @@ limitations under the License. */ #include "dgc/dgc.h" #endif -DEFINE_int32(paddle_num_threads, 1, - "Number of threads for each paddle instance."); +DECLARE_int32(paddle_num_threads); DEFINE_int32(multiple_of_cupti_buffer_size, 1, "Multiple of the CUPTI device buffer size. If the timestamps have " "been dropped when you are profiling, try increasing this value."); diff --git a/paddle/fluid/string/CMakeLists.txt b/paddle/fluid/string/CMakeLists.txt index 49a8fb82dbf67357c1c3f2658538789af51b7cdc..a465f5909a7c6ee83211b8e03f1c3e7d3103022c 100644 --- a/paddle/fluid/string/CMakeLists.txt +++ b/paddle/fluid/string/CMakeLists.txt @@ -1,6 +1,6 @@ -cc_library(stringpiece SRCS piece.cc) -cc_library(pretty_log SRCS pretty_log.cc) -cc_library(string_helper SRCS string_helper.cc DEPS boost) +cc_library(stringpiece SRCS piece.cc DEPS flags) +cc_library(pretty_log SRCS pretty_log.cc DEPS flags) +cc_library(string_helper SRCS string_helper.cc DEPS boost flags) cc_test(stringpiece_test SRCS piece_test.cc DEPS stringpiece glog gflags) cc_test(stringprintf_test SRCS printf_test.cc DEPS glog gflags) cc_test(to_string_test SRCS to_string_test.cc)