未验证 提交 e8f5757d 编写于 作者: T tensor-tang 提交者: GitHub

Merge pull request #11525 from tensor-tang/refine/mem

refine the initial cpu memory flag for mkldnn
...@@ -28,9 +28,15 @@ DEFINE_double(fraction_of_cpu_memory_to_use, 1, ...@@ -28,9 +28,15 @@ DEFINE_double(fraction_of_cpu_memory_to_use, 1,
"Default use 100% of CPU memory for PaddlePaddle," "Default use 100% of CPU memory for PaddlePaddle,"
"reserve the rest for page tables, etc"); "reserve the rest for page tables, etc");
DEFINE_uint64( DEFINE_uint64(initial_cpu_memory_in_mb,
initial_cpu_memory_in_mb, 500, #ifdef PADDLE_WITH_MKLDNN
"Default initial 500MB of CPU memory for PaddlePaddle, in MD unit."); /* Aligned with mozga-intel, MKLDNN need at least 5000 MB
* to obtain the best performance*/
5000,
#else
500,
#endif
"Initial CPU memory for PaddlePaddle, in MD unit.");
DEFINE_double( DEFINE_double(
fraction_of_cuda_pinned_memory_to_use, 0.5, fraction_of_cuda_pinned_memory_to_use, 0.5,
...@@ -59,10 +65,7 @@ inline size_t CpuTotalPhysicalMemory() { ...@@ -59,10 +65,7 @@ inline size_t CpuTotalPhysicalMemory() {
size_t CpuMaxAllocSize() { size_t CpuMaxAllocSize() {
// For distributed systems, it requires configuring and limiting // For distributed systems, it requires configuring and limiting
// the fraction of memory to use. // the fraction of memory to use.
return std::min( return FLAGS_fraction_of_cpu_memory_to_use * CpuTotalPhysicalMemory();
static_cast<size_t>(FLAGS_fraction_of_cpu_memory_to_use *
CpuTotalPhysicalMemory()),
static_cast<size_t>(FLAGS_initial_cpu_memory_in_mb * 1 << 20));
} }
size_t CpuMinChunkSize() { size_t CpuMinChunkSize() {
...@@ -71,8 +74,11 @@ size_t CpuMinChunkSize() { ...@@ -71,8 +74,11 @@ size_t CpuMinChunkSize() {
} }
size_t CpuMaxChunkSize() { size_t CpuMaxChunkSize() {
// Allow to allocate the maximum chunk size is roughly 3% of CPU memory. // Allow to allocate the maximum chunk size is roughly 3% of CPU memory,
return CpuMaxAllocSize() / 32; // or the initial_cpu_memory_in_mb.
return std::min(
static_cast<size_t>(CpuMaxAllocSize() / 32),
static_cast<size_t>(FLAGS_initial_cpu_memory_in_mb * 1 << 20));
} }
size_t CUDAPinnedMaxAllocSize() { size_t CUDAPinnedMaxAllocSize() {
......
...@@ -30,8 +30,9 @@ int main(int argc, char** argv) { ...@@ -30,8 +30,9 @@ int main(int argc, char** argv) {
new_argv.push_back( new_argv.push_back(
strdup("--tryfromenv=fraction_of_gpu_memory_to_use,use_pinned_memory")); strdup("--tryfromenv=fraction_of_gpu_memory_to_use,use_pinned_memory"));
#else #else
new_argv.push_back(strdup("--tryfromenv=use_pinned_memory,use_mkldnn")); new_argv.push_back(strdup(
new_argv.push_back(strdup("--undefok=use_mkldnn")); "--tryfromenv=use_pinned_memory,use_mkldnn,initial_cpu_memory_in_mb"));
new_argv.push_back(strdup("--undefok=use_mkldnn,initial_cpu_memory_in_mb"));
#endif #endif
int new_argc = static_cast<int>(new_argv.size()); int new_argc = static_cast<int>(new_argv.size());
char** new_argv_address = new_argv.data(); char** new_argv_address = new_argv.data();
......
...@@ -117,7 +117,7 @@ def __bootstrap__(): ...@@ -117,7 +117,7 @@ def __bootstrap__():
read_env_flags = [ read_env_flags = [
'use_pinned_memory', 'check_nan_inf', 'benchmark', 'warpctc_dir', 'use_pinned_memory', 'check_nan_inf', 'benchmark', 'warpctc_dir',
'eager_delete_scope', 'use_mkldnn' 'eager_delete_scope', 'use_mkldnn', 'initial_cpu_memory_in_mb'
] ]
if core.is_compiled_with_cuda(): if core.is_compiled_with_cuda():
read_env_flags += [ read_env_flags += [
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册