diff --git a/tensorflow/core/common_runtime/process_util.cc b/tensorflow/core/common_runtime/process_util.cc
index a5d31b75c75b8f0608f01f07c263f6343e5735cc..e1dc08d64545ece29a8aa2ab2612dd3cd994559e 100644
--- a/tensorflow/core/common_runtime/process_util.cc
+++ b/tensorflow/core/common_runtime/process_util.cc
@@ -28,6 +28,7 @@ limitations under the License.
 #include "tensorflow/core/platform/logging.h"
 #include "tensorflow/core/platform/tracing.h"
 #include "tensorflow/core/platform/types.h"
+#include "tensorflow/core/util/util.h"
 
 namespace tensorflow {
 
@@ -56,24 +57,26 @@ int32 NumInterOpThreadsFromSessionOptions(const SessionOptions& options) {
   const int32 inter_op = options.config.inter_op_parallelism_threads();
   if (inter_op != 0) return inter_op;
 #ifdef INTEL_MKL
-  // MKL library executes ops in parallel using OMP threads
-  // Set inter_op conservatively to avoid thread oversubscription that could
-  // lead to severe perf degradations and OMP resource exhaustion
-  int mkl_intra_op = 1;
+  if (!DisableMKL()) {
+    // MKL library executes ops in parallel using OMP threads
+    // Set inter_op conservatively to avoid thread oversubscription that could
+    // lead to severe perf degradations and OMP resource exhaustion
+    int mkl_intra_op = 1;
 #ifdef _OPENMP
-  mkl_intra_op = omp_get_max_threads();
+    mkl_intra_op = omp_get_max_threads();
 #endif  // _OPENMP
-  CHECK_GE(mkl_intra_op, 1);
-  const int32 mkl_inter_op = std::max(
-      (port::NumSchedulableCPUs() + mkl_intra_op - 1) / mkl_intra_op, 2);
-  VLOG(0) << "Creating new thread pool with default inter op setting: "
-          << mkl_inter_op
-          << ". Tune using inter_op_parallelism_threads for best performance.";
-  return mkl_inter_op;
-#else
+    DCHECK_GE(mkl_intra_op, 1);
+    const int32 mkl_inter_op = std::max(
+        (port::NumSchedulableCPUs() + mkl_intra_op - 1) / mkl_intra_op, 2);
+    VLOG(0)
+        << "Creating new thread pool with default inter op setting: "
+        << mkl_inter_op
+        << ". Tune using inter_op_parallelism_threads for best performance.";
+    return mkl_inter_op;
+  }
+#endif  // INTEL_MKL
   // Default to using the number of cores available in the process.
   return port::NumSchedulableCPUs();
-#endif  // INTEL_MKL
 }
 
 thread::ThreadPool* NewThreadPoolFromSessionOptions(
diff --git a/tensorflow/core/common_runtime/threadpool_device.cc b/tensorflow/core/common_runtime/threadpool_device.cc
index 8587d1783acdefa35ae111a1440a29e896e8ae9c..6404d8bc6a209997afbe33c547679ebb2cb5cbf5 100644
--- a/tensorflow/core/common_runtime/threadpool_device.cc
+++ b/tensorflow/core/common_runtime/threadpool_device.cc
@@ -29,6 +29,7 @@ limitations under the License.
 #include "tensorflow/core/platform/tracing.h"
 #include "tensorflow/core/platform/types.h"
 #include "tensorflow/core/public/session_options.h"
+#include "tensorflow/core/util/util.h"
 
 #ifdef INTEL_MKL
 #ifdef _OPENMP
@@ -49,6 +50,8 @@ ThreadPoolDevice::ThreadPoolDevice(const SessionOptions& options,
       allocator_(allocator),
       scoped_allocator_mgr_(new ScopedAllocatorMgr(name)) {
 #ifdef INTEL_MKL
+  // Early return when MKL is disabled
+  if (DisableMKL()) return;
 #ifdef _OPENMP
   const char* user_omp_threads = getenv("OMP_NUM_THREADS");
   if (user_omp_threads == nullptr) {
@@ -114,7 +117,8 @@ class MklCPUAllocatorFactory : public AllocatorFactory {
 };
 
 #ifdef ENABLE_MKL
-REGISTER_MEM_ALLOCATOR("MklCPUAllocator", 200, MklCPUAllocatorFactory);
+REGISTER_MEM_ALLOCATOR("MklCPUAllocator", (DisableMKL() ? 50 : 200),
+                       MklCPUAllocatorFactory);
 #endif  // ENABLE_MKL
 
 }  // namespace
diff --git a/tensorflow/core/graph/mkl_layout_pass.cc b/tensorflow/core/graph/mkl_layout_pass.cc
index 06d3fefef1edf96a1de0914d46487ccd85df615d..7394b1cddfbc56c758e7853dea548429d8c25608 100644
--- a/tensorflow/core/graph/mkl_layout_pass.cc
+++ b/tensorflow/core/graph/mkl_layout_pass.cc
@@ -38,6 +38,7 @@ limitations under the License.
 #include "tensorflow/core/lib/hash/hash.h"
 #include "tensorflow/core/platform/logging.h"
 #include "tensorflow/core/util/tensor_format.h"
+#include "tensorflow/core/util/util.h"
 
 #include "tensorflow/core/graph/mkl_graph_util.h"
 #include "tensorflow/core/graph/mkl_layout_pass.h"
@@ -4511,6 +4512,10 @@ Status MklLayoutRewritePass::Run(const GraphOptimizationPassOptions& options) {
   if (options.graph == nullptr && options.partition_graphs == nullptr) {
     return Status::OK();
   }
+  if (DisableMKL()) {
+    VLOG(2) << "TF-MKL: Disabling MKL";
+    return Status::OK();
+  }
 
   auto process_graph = [&](std::unique_ptr<Graph>* g) {
     // Get the ownership of a graph
diff --git a/tensorflow/core/graph/mkl_tfconversion_pass.cc b/tensorflow/core/graph/mkl_tfconversion_pass.cc
index 8c5ffd71a3215761819b5ef32a417c3eda576655..6804ab84ce3260fa1cbb0b23cc2dff90baed8855 100644
--- a/tensorflow/core/graph/mkl_tfconversion_pass.cc
+++ b/tensorflow/core/graph/mkl_tfconversion_pass.cc
@@ -31,6 +31,7 @@ limitations under the License.
 #include "tensorflow/core/lib/gtl/map_util.h"
 #include "tensorflow/core/lib/hash/hash.h"
 #include "tensorflow/core/platform/logging.h"
+#include "tensorflow/core/util/util.h"
 
 #include "tensorflow/core/graph/mkl_graph_util.h"
 #include "tensorflow/core/graph/mkl_tfconversion_pass.h"
@@ -424,6 +425,10 @@ Status MklToTfConversionPass::Run(const GraphOptimizationPassOptions& options) {
   if (options.graph == nullptr && options.partition_graphs == nullptr) {
     return Status::OK();
   }
+  if (DisableMKL()) {
+    VLOG(2) << "TF-MKL: Disabling MKL";
+    return Status::OK();
+  }
 
   auto process_graph = [&](std::unique_ptr<Graph>* g) {
     // Get the ownership of graph
diff --git a/tensorflow/core/util/util.cc b/tensorflow/core/util/util.cc
index 1e5a9c571264ffe62951fe51b01aa80fea7c2728..489999d1e859bec049c96d8114e291c9a8d1eb4a 100644
--- a/tensorflow/core/util/util.cc
+++ b/tensorflow/core/util/util.cc
@@ -120,4 +120,20 @@ string SliceDebugString(const TensorShape& shape, const int64 flat) {
   return result;
 }
 
+#ifdef INTEL_MKL
+bool DisableMKL() {
+  enum MklStatus { MKL_DEFAULT = 0, MKL_ON = 1, MKL_OFF = 2 };
+  static MklStatus status = MKL_DEFAULT;
+  if (status == MKL_DEFAULT) {
+    char* tf_disable_mkl = getenv("TF_DISABLE_MKL");
+    if ((tf_disable_mkl != NULL) && (std::stoi(tf_disable_mkl) == 1)) {
+      VLOG(2) << "TF-MKL: Disabling MKL";
+      status = MKL_OFF;
+    } else {
+      status = MKL_ON;
+    }
+  }
+  return status == MKL_OFF ? true : false;
+}
+#endif  // INTEL_MKL
 }  // namespace tensorflow
diff --git a/tensorflow/core/util/util.h b/tensorflow/core/util/util.h
index 93dfd51ab5afccad5f42b79c4f03767045e20591..4aa47aa48a26f684b31564cc841d6563bd5dfda2 100644
--- a/tensorflow/core/util/util.h
+++ b/tensorflow/core/util/util.h
@@ -56,6 +56,11 @@ string PrintMemory(const char* ptr, size_t n);
 // "tensor", "tensor[i]", "tensor[i, j]", etc.
 string SliceDebugString(const TensorShape& shape, const int64 flat);
 
+// disable MKL in runtime
+#ifdef INTEL_MKL
+bool DisableMKL();
+#endif  // INTEL_MKL
+
 }  // namespace tensorflow
 
 #endif  // TENSORFLOW_CORE_UTIL_UTIL_H_