Merge pull request #11525 from tensor-tang/refine/mem

refine the initial cpu memory flag for mkldnn

Merge pull request #11525 from tensor-tang/refine/mem
refine the initial cpu memory flag for mkldnn
e8f5757d · tensor-tang · GitHub · 69827f30 · 9a25f289 · e8f5757d
3 changed file
--- a/paddle/fluid/platform/cpu_info.cc
+++ b/paddle/fluid/platform/cpu_info.cc
@@ -28,9 +28,15 @@ DEFINE_double(fraction_of_cpu_memory_to_use, 1,
              "Default use 100% of CPU memory for PaddlePaddle,"
              "reserve the rest for page tables, etc");

-DEFINE_uint64(
-    initial_cpu_memory_in_mb, 500,
-    "Default initial 500MB of CPU memory for PaddlePaddle, in MD unit.");
+DEFINE_uint64(initial_cpu_memory_in_mb,
+#ifdef PADDLE_WITH_MKLDNN
+              /* Aligned with mozga-intel, MKLDNN need at least 5000 MB
+               * to obtain the best performance*/
+              5000,
+#else
+              500,
+#endif
+              "Initial CPU memory for PaddlePaddle, in MD unit.");

 DEFINE_double(
    fraction_of_cuda_pinned_memory_to_use, 0.5,
@@ -59,10 +65,7 @@ inline size_t CpuTotalPhysicalMemory() {
 size_t CpuMaxAllocSize() {
  // For distributed systems, it requires configuring and limiting
  // the fraction of memory to use.
-  return std::min(
-      static_cast<size_t>(FLAGS_fraction_of_cpu_memory_to_use *
-                          CpuTotalPhysicalMemory()),
-      static_cast<size_t>(FLAGS_initial_cpu_memory_in_mb * 1 << 20));
+  return FLAGS_fraction_of_cpu_memory_to_use * CpuTotalPhysicalMemory();
 }

 size_t CpuMinChunkSize() {
@@ -71,8 +74,11 @@ size_t CpuMinChunkSize() {
 }

 size_t CpuMaxChunkSize() {
-  // Allow to allocate the maximum chunk size is roughly 3% of CPU memory.
-  return CpuMaxAllocSize() / 32;
+  // Allow to allocate the maximum chunk size is roughly 3% of CPU memory,
+  // or the initial_cpu_memory_in_mb.
+  return std::min(
+      static_cast<size_t>(CpuMaxAllocSize() / 32),
+      static_cast<size_t>(FLAGS_initial_cpu_memory_in_mb * 1 << 20));
 }

 size_t CUDAPinnedMaxAllocSize() {

--- a/paddle/testing/paddle_gtest_main.cc
+++ b/paddle/testing/paddle_gtest_main.cc
@@ -30,8 +30,9 @@ int main(int argc, char** argv) {
  new_argv.push_back(
      strdup("--tryfromenv=fraction_of_gpu_memory_to_use,use_pinned_memory"));
 #else
-  new_argv.push_back(strdup("--tryfromenv=use_pinned_memory,use_mkldnn"));
-  new_argv.push_back(strdup("--undefok=use_mkldnn"));
+  new_argv.push_back(strdup(
+      "--tryfromenv=use_pinned_memory,use_mkldnn,initial_cpu_memory_in_mb"));
+  new_argv.push_back(strdup("--undefok=use_mkldnn,initial_cpu_memory_in_mb"));
 #endif
  int new_argc = static_cast<int>(new_argv.size());
  char** new_argv_address = new_argv.data();

--- a/python/paddle/fluid/__init__.py
+++ b/python/paddle/fluid/__init__.py
@@ -117,7 +117,7 @@ def __bootstrap__():

    read_env_flags = [
        'use_pinned_memory', 'check_nan_inf', 'benchmark', 'warpctc_dir',
-        'eager_delete_scope', 'use_mkldnn'
+        'eager_delete_scope', 'use_mkldnn', 'initial_cpu_memory_in_mb'
    ]
    if core.is_compiled_with_cuda():
        read_env_flags += [