diff --git a/paddle/fluid/platform/cpu_info.cc b/paddle/fluid/platform/cpu_info.cc index 40dc7c9a0b6a40f2419ace3ce7e0e5e82bc95c1a..f832d72b53e8d06a32d5c0ac2ecf7130aa28a666 100644 --- a/paddle/fluid/platform/cpu_info.cc +++ b/paddle/fluid/platform/cpu_info.cc @@ -28,9 +28,15 @@ DEFINE_double(fraction_of_cpu_memory_to_use, 1, "Default use 100% of CPU memory for PaddlePaddle," "reserve the rest for page tables, etc"); -DEFINE_uint64( - initial_cpu_memory_in_mb, 500, - "Default initial 500MB of CPU memory for PaddlePaddle, in MD unit."); +DEFINE_uint64(initial_cpu_memory_in_mb, +#ifdef PADDLE_WITH_MKLDNN + /* Aligned with mozga-intel, MKLDNN need at least 5000 MB + * to obtain the best performance*/ + 5000, +#else + 500, +#endif + "Initial CPU memory for PaddlePaddle, in MD unit."); DEFINE_double( fraction_of_cuda_pinned_memory_to_use, 0.5, @@ -59,10 +65,7 @@ inline size_t CpuTotalPhysicalMemory() { size_t CpuMaxAllocSize() { // For distributed systems, it requires configuring and limiting // the fraction of memory to use. - return std::min( - static_cast(FLAGS_fraction_of_cpu_memory_to_use * - CpuTotalPhysicalMemory()), - static_cast(FLAGS_initial_cpu_memory_in_mb * 1 << 20)); + return FLAGS_fraction_of_cpu_memory_to_use * CpuTotalPhysicalMemory(); } size_t CpuMinChunkSize() { @@ -71,8 +74,11 @@ size_t CpuMinChunkSize() { } size_t CpuMaxChunkSize() { - // Allow to allocate the maximum chunk size is roughly 3% of CPU memory. - return CpuMaxAllocSize() / 32; + // Allow to allocate the maximum chunk size is roughly 3% of CPU memory, + // or the initial_cpu_memory_in_mb. + return std::min( + static_cast(CpuMaxAllocSize() / 32), + static_cast(FLAGS_initial_cpu_memory_in_mb * 1 << 20)); } size_t CUDAPinnedMaxAllocSize() { diff --git a/paddle/testing/paddle_gtest_main.cc b/paddle/testing/paddle_gtest_main.cc index 7772dc97f5c1a9e024e0fbbc310b6d7c388d4cd5..555be3d00e2dc467eec45210cc997779827ed69f 100644 --- a/paddle/testing/paddle_gtest_main.cc +++ b/paddle/testing/paddle_gtest_main.cc @@ -30,8 +30,9 @@ int main(int argc, char** argv) { new_argv.push_back( strdup("--tryfromenv=fraction_of_gpu_memory_to_use,use_pinned_memory")); #else - new_argv.push_back(strdup("--tryfromenv=use_pinned_memory,use_mkldnn")); - new_argv.push_back(strdup("--undefok=use_mkldnn")); + new_argv.push_back(strdup( + "--tryfromenv=use_pinned_memory,use_mkldnn,initial_cpu_memory_in_mb")); + new_argv.push_back(strdup("--undefok=use_mkldnn,initial_cpu_memory_in_mb")); #endif int new_argc = static_cast(new_argv.size()); char** new_argv_address = new_argv.data(); diff --git a/python/paddle/fluid/__init__.py b/python/paddle/fluid/__init__.py index bd985ad733aa8eece2f8374d033f452a0175a011..5af5bc9c4731317075b3912a4749a0b358bdd56e 100644 --- a/python/paddle/fluid/__init__.py +++ b/python/paddle/fluid/__init__.py @@ -117,7 +117,7 @@ def __bootstrap__(): read_env_flags = [ 'use_pinned_memory', 'check_nan_inf', 'benchmark', 'warpctc_dir', - 'eager_delete_scope', 'use_mkldnn' + 'eager_delete_scope', 'use_mkldnn', 'initial_cpu_memory_in_mb' ] if core.is_compiled_with_cuda(): read_env_flags += [