diff --git a/tensorflow/core/common_runtime/process_util.cc b/tensorflow/core/common_runtime/process_util.cc index a5d31b75c75b8f0608f01f07c263f6343e5735cc..e1dc08d64545ece29a8aa2ab2612dd3cd994559e 100644 --- a/tensorflow/core/common_runtime/process_util.cc +++ b/tensorflow/core/common_runtime/process_util.cc @@ -28,6 +28,7 @@ limitations under the License. #include "tensorflow/core/platform/logging.h" #include "tensorflow/core/platform/tracing.h" #include "tensorflow/core/platform/types.h" +#include "tensorflow/core/util/util.h" namespace tensorflow { @@ -56,24 +57,26 @@ int32 NumInterOpThreadsFromSessionOptions(const SessionOptions& options) { const int32 inter_op = options.config.inter_op_parallelism_threads(); if (inter_op != 0) return inter_op; #ifdef INTEL_MKL - // MKL library executes ops in parallel using OMP threads - // Set inter_op conservatively to avoid thread oversubscription that could - // lead to severe perf degradations and OMP resource exhaustion - int mkl_intra_op = 1; + if (!DisableMKL()) { + // MKL library executes ops in parallel using OMP threads + // Set inter_op conservatively to avoid thread oversubscription that could + // lead to severe perf degradations and OMP resource exhaustion + int mkl_intra_op = 1; #ifdef _OPENMP - mkl_intra_op = omp_get_max_threads(); + mkl_intra_op = omp_get_max_threads(); #endif // _OPENMP - CHECK_GE(mkl_intra_op, 1); - const int32 mkl_inter_op = std::max( - (port::NumSchedulableCPUs() + mkl_intra_op - 1) / mkl_intra_op, 2); - VLOG(0) << "Creating new thread pool with default inter op setting: " - << mkl_inter_op - << ". Tune using inter_op_parallelism_threads for best performance."; - return mkl_inter_op; -#else + DCHECK_GE(mkl_intra_op, 1); + const int32 mkl_inter_op = std::max( + (port::NumSchedulableCPUs() + mkl_intra_op - 1) / mkl_intra_op, 2); + VLOG(0) + << "Creating new thread pool with default inter op setting: " + << mkl_inter_op + << ". Tune using inter_op_parallelism_threads for best performance."; + return mkl_inter_op; + } +#endif // INTEL_MKL // Default to using the number of cores available in the process. return port::NumSchedulableCPUs(); -#endif // INTEL_MKL } thread::ThreadPool* NewThreadPoolFromSessionOptions( diff --git a/tensorflow/core/common_runtime/threadpool_device.cc b/tensorflow/core/common_runtime/threadpool_device.cc index 8587d1783acdefa35ae111a1440a29e896e8ae9c..6404d8bc6a209997afbe33c547679ebb2cb5cbf5 100644 --- a/tensorflow/core/common_runtime/threadpool_device.cc +++ b/tensorflow/core/common_runtime/threadpool_device.cc @@ -29,6 +29,7 @@ limitations under the License. #include "tensorflow/core/platform/tracing.h" #include "tensorflow/core/platform/types.h" #include "tensorflow/core/public/session_options.h" +#include "tensorflow/core/util/util.h" #ifdef INTEL_MKL #ifdef _OPENMP @@ -49,6 +50,8 @@ ThreadPoolDevice::ThreadPoolDevice(const SessionOptions& options, allocator_(allocator), scoped_allocator_mgr_(new ScopedAllocatorMgr(name)) { #ifdef INTEL_MKL + // Early return when MKL is disabled + if (DisableMKL()) return; #ifdef _OPENMP const char* user_omp_threads = getenv("OMP_NUM_THREADS"); if (user_omp_threads == nullptr) { @@ -114,7 +117,8 @@ class MklCPUAllocatorFactory : public AllocatorFactory { }; #ifdef ENABLE_MKL -REGISTER_MEM_ALLOCATOR("MklCPUAllocator", 200, MklCPUAllocatorFactory); +REGISTER_MEM_ALLOCATOR("MklCPUAllocator", (DisableMKL() ? 50 : 200), + MklCPUAllocatorFactory); #endif // ENABLE_MKL } // namespace diff --git a/tensorflow/core/graph/mkl_layout_pass.cc b/tensorflow/core/graph/mkl_layout_pass.cc index 06d3fefef1edf96a1de0914d46487ccd85df615d..7394b1cddfbc56c758e7853dea548429d8c25608 100644 --- a/tensorflow/core/graph/mkl_layout_pass.cc +++ b/tensorflow/core/graph/mkl_layout_pass.cc @@ -38,6 +38,7 @@ limitations under the License. #include "tensorflow/core/lib/hash/hash.h" #include "tensorflow/core/platform/logging.h" #include "tensorflow/core/util/tensor_format.h" +#include "tensorflow/core/util/util.h" #include "tensorflow/core/graph/mkl_graph_util.h" #include "tensorflow/core/graph/mkl_layout_pass.h" @@ -4511,6 +4512,10 @@ Status MklLayoutRewritePass::Run(const GraphOptimizationPassOptions& options) { if (options.graph == nullptr && options.partition_graphs == nullptr) { return Status::OK(); } + if (DisableMKL()) { + VLOG(2) << "TF-MKL: Disabling MKL"; + return Status::OK(); + } auto process_graph = [&](std::unique_ptr* g) { // Get the ownership of a graph diff --git a/tensorflow/core/graph/mkl_tfconversion_pass.cc b/tensorflow/core/graph/mkl_tfconversion_pass.cc index 8c5ffd71a3215761819b5ef32a417c3eda576655..6804ab84ce3260fa1cbb0b23cc2dff90baed8855 100644 --- a/tensorflow/core/graph/mkl_tfconversion_pass.cc +++ b/tensorflow/core/graph/mkl_tfconversion_pass.cc @@ -31,6 +31,7 @@ limitations under the License. #include "tensorflow/core/lib/gtl/map_util.h" #include "tensorflow/core/lib/hash/hash.h" #include "tensorflow/core/platform/logging.h" +#include "tensorflow/core/util/util.h" #include "tensorflow/core/graph/mkl_graph_util.h" #include "tensorflow/core/graph/mkl_tfconversion_pass.h" @@ -424,6 +425,10 @@ Status MklToTfConversionPass::Run(const GraphOptimizationPassOptions& options) { if (options.graph == nullptr && options.partition_graphs == nullptr) { return Status::OK(); } + if (DisableMKL()) { + VLOG(2) << "TF-MKL: Disabling MKL"; + return Status::OK(); + } auto process_graph = [&](std::unique_ptr* g) { // Get the ownership of graph diff --git a/tensorflow/core/util/util.cc b/tensorflow/core/util/util.cc index 1e5a9c571264ffe62951fe51b01aa80fea7c2728..489999d1e859bec049c96d8114e291c9a8d1eb4a 100644 --- a/tensorflow/core/util/util.cc +++ b/tensorflow/core/util/util.cc @@ -120,4 +120,20 @@ string SliceDebugString(const TensorShape& shape, const int64 flat) { return result; } +#ifdef INTEL_MKL +bool DisableMKL() { + enum MklStatus { MKL_DEFAULT = 0, MKL_ON = 1, MKL_OFF = 2 }; + static MklStatus status = MKL_DEFAULT; + if (status == MKL_DEFAULT) { + char* tf_disable_mkl = getenv("TF_DISABLE_MKL"); + if ((tf_disable_mkl != NULL) && (std::stoi(tf_disable_mkl) == 1)) { + VLOG(2) << "TF-MKL: Disabling MKL"; + status = MKL_OFF; + } else { + status = MKL_ON; + } + } + return status == MKL_OFF ? true : false; +} +#endif // INTEL_MKL } // namespace tensorflow diff --git a/tensorflow/core/util/util.h b/tensorflow/core/util/util.h index 93dfd51ab5afccad5f42b79c4f03767045e20591..4aa47aa48a26f684b31564cc841d6563bd5dfda2 100644 --- a/tensorflow/core/util/util.h +++ b/tensorflow/core/util/util.h @@ -56,6 +56,11 @@ string PrintMemory(const char* ptr, size_t n); // "tensor", "tensor[i]", "tensor[i, j]", etc. string SliceDebugString(const TensorShape& shape, const int64 flat); +// disable MKL in runtime +#ifdef INTEL_MKL +bool DisableMKL(); +#endif // INTEL_MKL + } // namespace tensorflow #endif // TENSORFLOW_CORE_UTIL_UTIL_H_