提交 6b0d1ec9 编写于 作者: T TensorFlower Gardener

Merge pull request #22493 from Intel-tensorflow:cuixiaom_disable_MKL

PiperOrigin-RevId: 215560522
......@@ -28,6 +28,7 @@ limitations under the License.
#include "tensorflow/core/platform/logging.h"
#include "tensorflow/core/platform/tracing.h"
#include "tensorflow/core/platform/types.h"
#include "tensorflow/core/util/util.h"
namespace tensorflow {
......@@ -56,24 +57,26 @@ int32 NumInterOpThreadsFromSessionOptions(const SessionOptions& options) {
const int32 inter_op = options.config.inter_op_parallelism_threads();
if (inter_op != 0) return inter_op;
#ifdef INTEL_MKL
// MKL library executes ops in parallel using OMP threads
// Set inter_op conservatively to avoid thread oversubscription that could
// lead to severe perf degradations and OMP resource exhaustion
int mkl_intra_op = 1;
if (!DisableMKL()) {
// MKL library executes ops in parallel using OMP threads
// Set inter_op conservatively to avoid thread oversubscription that could
// lead to severe perf degradations and OMP resource exhaustion
int mkl_intra_op = 1;
#ifdef _OPENMP
mkl_intra_op = omp_get_max_threads();
mkl_intra_op = omp_get_max_threads();
#endif // _OPENMP
CHECK_GE(mkl_intra_op, 1);
const int32 mkl_inter_op = std::max(
(port::NumSchedulableCPUs() + mkl_intra_op - 1) / mkl_intra_op, 2);
VLOG(0) << "Creating new thread pool with default inter op setting: "
<< mkl_inter_op
<< ". Tune using inter_op_parallelism_threads for best performance.";
return mkl_inter_op;
#else
DCHECK_GE(mkl_intra_op, 1);
const int32 mkl_inter_op = std::max(
(port::NumSchedulableCPUs() + mkl_intra_op - 1) / mkl_intra_op, 2);
VLOG(0)
<< "Creating new thread pool with default inter op setting: "
<< mkl_inter_op
<< ". Tune using inter_op_parallelism_threads for best performance.";
return mkl_inter_op;
}
#endif // INTEL_MKL
// Default to using the number of cores available in the process.
return port::NumSchedulableCPUs();
#endif // INTEL_MKL
}
thread::ThreadPool* NewThreadPoolFromSessionOptions(
......
......@@ -29,6 +29,7 @@ limitations under the License.
#include "tensorflow/core/platform/tracing.h"
#include "tensorflow/core/platform/types.h"
#include "tensorflow/core/public/session_options.h"
#include "tensorflow/core/util/util.h"
#ifdef INTEL_MKL
#ifdef _OPENMP
......@@ -49,6 +50,8 @@ ThreadPoolDevice::ThreadPoolDevice(const SessionOptions& options,
allocator_(allocator),
scoped_allocator_mgr_(new ScopedAllocatorMgr(name)) {
#ifdef INTEL_MKL
// Early return when MKL is disabled
if (DisableMKL()) return;
#ifdef _OPENMP
const char* user_omp_threads = getenv("OMP_NUM_THREADS");
if (user_omp_threads == nullptr) {
......@@ -114,7 +117,8 @@ class MklCPUAllocatorFactory : public AllocatorFactory {
};
#ifdef ENABLE_MKL
REGISTER_MEM_ALLOCATOR("MklCPUAllocator", 200, MklCPUAllocatorFactory);
REGISTER_MEM_ALLOCATOR("MklCPUAllocator", (DisableMKL() ? 50 : 200),
MklCPUAllocatorFactory);
#endif // ENABLE_MKL
} // namespace
......
......@@ -38,6 +38,7 @@ limitations under the License.
#include "tensorflow/core/lib/hash/hash.h"
#include "tensorflow/core/platform/logging.h"
#include "tensorflow/core/util/tensor_format.h"
#include "tensorflow/core/util/util.h"
#include "tensorflow/core/graph/mkl_graph_util.h"
#include "tensorflow/core/graph/mkl_layout_pass.h"
......@@ -4511,6 +4512,10 @@ Status MklLayoutRewritePass::Run(const GraphOptimizationPassOptions& options) {
if (options.graph == nullptr && options.partition_graphs == nullptr) {
return Status::OK();
}
if (DisableMKL()) {
VLOG(2) << "TF-MKL: Disabling MKL";
return Status::OK();
}
auto process_graph = [&](std::unique_ptr<Graph>* g) {
// Get the ownership of a graph
......
......@@ -31,6 +31,7 @@ limitations under the License.
#include "tensorflow/core/lib/gtl/map_util.h"
#include "tensorflow/core/lib/hash/hash.h"
#include "tensorflow/core/platform/logging.h"
#include "tensorflow/core/util/util.h"
#include "tensorflow/core/graph/mkl_graph_util.h"
#include "tensorflow/core/graph/mkl_tfconversion_pass.h"
......@@ -424,6 +425,10 @@ Status MklToTfConversionPass::Run(const GraphOptimizationPassOptions& options) {
if (options.graph == nullptr && options.partition_graphs == nullptr) {
return Status::OK();
}
if (DisableMKL()) {
VLOG(2) << "TF-MKL: Disabling MKL";
return Status::OK();
}
auto process_graph = [&](std::unique_ptr<Graph>* g) {
// Get the ownership of graph
......
......@@ -120,4 +120,20 @@ string SliceDebugString(const TensorShape& shape, const int64 flat) {
return result;
}
#ifdef INTEL_MKL
bool DisableMKL() {
enum MklStatus { MKL_DEFAULT = 0, MKL_ON = 1, MKL_OFF = 2 };
static MklStatus status = MKL_DEFAULT;
if (status == MKL_DEFAULT) {
char* tf_disable_mkl = getenv("TF_DISABLE_MKL");
if ((tf_disable_mkl != NULL) && (std::stoi(tf_disable_mkl) == 1)) {
VLOG(2) << "TF-MKL: Disabling MKL";
status = MKL_OFF;
} else {
status = MKL_ON;
}
}
return status == MKL_OFF ? true : false;
}
#endif // INTEL_MKL
} // namespace tensorflow
......@@ -56,6 +56,11 @@ string PrintMemory(const char* ptr, size_t n);
// "tensor", "tensor[i]", "tensor[i, j]", etc.
string SliceDebugString(const TensorShape& shape, const int64 flat);
// disable MKL in runtime
#ifdef INTEL_MKL
bool DisableMKL();
#endif // INTEL_MKL
} // namespace tensorflow
#endif // TENSORFLOW_CORE_UTIL_UTIL_H_
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册