Merge pull request #7164 from tensor-tang/context

Add MKLDNNDeviceContext

Merge pull request #7164 from tensor-tang/context
Add MKLDNNDeviceContext
673dc26c · tensor-tang · GitHub · 894236a1 · 6177cb51 · 673dc26c
7 changed file
--- a/cmake/external/mkldnn.cmake
+++ b/cmake/external/mkldnn.cmake
@@ -63,9 +63,17 @@ ExternalProject_Add(
                        -DMKLROOT:PATH=${MKLML_ROOT}
 )
-ADD_LIBRARY(mkldnn SHARED IMPORTED GLOBAL)
+ADD_LIBRARY(shared_mkldnn SHARED IMPORTED GLOBAL)
-SET_PROPERTY(TARGET mkldnn PROPERTY IMPORTED_LOCATION ${MKLDNN_LIB})
+SET_PROPERTY(TARGET shared_mkldnn PROPERTY IMPORTED_LOCATION ${MKLDNN_LIB})
-ADD_DEPENDENCIES(mkldnn ${MKLDNN_PROJECT})
+ADD_DEPENDENCIES(shared_mkldnn ${MKLDNN_PROJECT})
 MESSAGE(STATUS "MKLDNN library: ${MKLDNN_LIB}")
 add_definitions(-DPADDLE_WITH_MKLDNN)
-LIST(APPEND external_project_dependencies mkldnn)
+LIST(APPEND external_project_dependencies shared_mkldnn)
+# generate a static dummy target to track mkldnn dependencies
+# for cc_library(xxx SRCS xxx.c DEPS mkldnn)
+SET(dummyfile ${CMAKE_CURRENT_BINARY_DIR}/mkldnn_dummy.c)
+FILE(WRITE ${dummyfile} "const char * dummy = \"${dummyfile}\";")
+ADD_LIBRARY(mkldnn STATIC ${dummyfile})
+TARGET_LINK_LIBRARIES(mkldnn ${MKLDNN_LIB} ${MKLML_LIB} ${MKLML_IOMP_LIB})
+ADD_DEPENDENCIES(mkldnn ${MKLDNN_PROJECT})
--- a/paddle/framework/CMakeLists.txt
+++ b/paddle/framework/CMakeLists.txt
@@ -41,7 +41,7 @@ device_context)
 cc_library(op_proto_maker SRCS op_proto_maker.cc DEPS framework_proto attribute)
 cc_test(op_proto_maker_test SRCS op_proto_maker_test.cc DEPS op_proto_maker)
 cc_library(op_info SRCS op_info.cc DEPS attribute framework_proto)
-cc_library(shape_inference SRCS shape_inference.cc DEPS ddim attribute)
+cc_library(shape_inference SRCS shape_inference.cc DEPS ddim attribute device_context)
 cc_library(operator SRCS operator.cc DEPS op_info device_context tensor scope glog
    shape_inference data_transform)
 cc_test(operator_test SRCS operator_test.cc DEPS operator op_registry init)

--- a/paddle/operators/tensor.save
+++ b/paddle/operators/tensor.save
--- a/paddle/platform/CMakeLists.txt
+++ b/paddle/platform/CMakeLists.txt
@@ -21,10 +21,16 @@ ELSE()
    set(GPU_CTX_DEPS)
 ENDIF()
+IF(WITH_MKLDNN)
+    set(MKLDNN_CTX_DEPS mkldnn)
+ELSE()
+    set(MKLDNN_CTX_DEPS)
+ENDIF()
 # memcpy deoends on device_context, here add deps individually for
 # avoiding cycle dependencies
 cc_library(device_context SRCS device_context.cc DEPS memory buddy_allocator
-    system_allocator memory_block meta_data meta_cache place eigen3 ${GPU_CTX_DEPS})
+    system_allocator memory_block meta_data meta_cache place eigen3 ${GPU_CTX_DEPS} ${MKLDNN_CTX_DEPS})
 nv_test(device_context_test SRCS device_context_test.cu DEPS device_context gpu_info)
 nv_test(cudnn_helper_test SRCS cudnn_helper_test.cc DEPS dynload_cuda)

--- a/paddle/platform/device_context.cc
+++ b/paddle/platform/device_context.cc
@@ -168,5 +168,69 @@ cudaStream_t CUDADeviceContext::stream() const { return stream_; }
 #endif
+#ifdef PADDLE_WITH_MKLDNN
+MKLDNNDeviceContext::MKLDNNDeviceContext(CPUPlace place)
+    : CPUDeviceContext(place), ready_(false) {
+  stream_.reset(new mkldnn::stream(mkldnn::stream::kind::eager));
+  engine_.reset(new mkldnn::engine(mkldnn::engine::cpu, 0));
+}
+template <typename T>
+void MKLDNNDeviceContext::AddElement(const std::string& op_key,
+                                     const T& value) {
+  if (GetElement<T>(op_key)) {
+    return;
+  }
+  GetElementPool<T>().emplace(op_key, std::move(value));
+}
+template <typename T>
+const T& MKLDNNDeviceContext::GetElement(const std::string& op_key) const {
+  auto it = GetElementPool<T>().find(op_key);
+  return it == GetElementPool<T>().end() ? nullptr : it->second;
+}
+template <>
+const std::unordered_map<const std::string, const MKLDNNMemoryPtr,
+                         std::hash<std::string>>&
+MKLDNNDeviceContext::GetElementPool<MKLDNNMemoryPtr>() const {
+  return memory_pool_;
+}
+template <>
+const std::unordered_map<const std::string, const MKLDNNPrimitivePtr,
+                         std::hash<std::string>>&
+MKLDNNDeviceContext::GetElementPool<MKLDNNPrimitivePtr>() const {
+  return primitive_pool_;
+}
+template <>
+const std::unordered_map<const std::string, const MKLDNNPrimitiveDescPtr,
+                         std::hash<std::string>>&
+MKLDNNDeviceContext::GetElementPool<MKLDNNPrimitiveDescPtr>() const {
+  return primitive_desc_pool_;
+}
+void MKLDNNDeviceContext::Execute(bool block) {
+  if (pipeline_.empty()) {
+    return;
+  }
+  ResetStream();
+  stream_->submit(pipeline_).wait(block);
+  ready_ = false;
+  pipeline_.clear();
+}
+void MKLDNNDeviceContext::ResetStream() {
+  if (ready_) {
+    return;
+  }
+  // TODO(TJ): change me when mkldnn have specific method to reset this state
+  stream_.reset(new mkldnn::stream(mkldnn::stream::kind::eager));
+  ready_ = true;
+}
+#endif
 }  // namespace platform
 }  // namespace paddle
--- a/paddle/platform/device_context.h
+++ b/paddle/platform/device_context.h
@@ -21,6 +21,10 @@ limitations under the License. */
 #define EIGEN_USE_GPU
 #endif
+#ifdef PADDLE_WITH_MKLDNN
+#include "paddle/platform/mkldnn_helper.h"
+#endif
 #include "paddle/platform/enforce.h"
 #include "paddle/platform/place.h"
 #include "unsupported/Eigen/CXX11/Tensor"
@@ -105,6 +109,54 @@ struct DefaultDeviceContextType<platform::CUDAPlace> {
 #endif
+#ifdef PADDLE_WITH_MKLDNN
+class MKLDNNDeviceContext : public CPUDeviceContext {
+ public:
+  explicit MKLDNNDeviceContext(CPUPlace place);
+  /* \brief  Add new element: memory, primitive or primitive desc */
+  template <typename T>
+  void AddElement(const std::string& op_key, const T& value);
+  /* \brief  Get existed element: memory, primitive or primitive desc */
+  template <typename T>
+  const T& GetElement(const std::string& op_key) const;
+  /* \brief  Get element pool: memory, primitive or primitive desc pool */
+  template <typename T>
+  const std::unordered_map<const std::string, const T, std::hash<std::string>>&
+  GetElementPool() const;
+  /* \brief  Get the active engine */
+  const MKLDNNEngine& engine() const { return *engine_; }
+  /* \brief  Submit primitive to pipeline */
+  void Submit(const MKLDNNPrimitivePtr& p) { pipeline_.push_back(*p); }
+  /*! \brief  Execute all submitted primitives in pipeline */
+  void Execute(bool block = true);
+ protected:
+  /*! \brief  Reset the stream to prepare next exectue */
+  void ResetStream();
+ private:
+  std::unordered_map<const std::string, const MKLDNNMemoryPtr,
+                     std::hash<std::string>>
+      memory_pool_;
+  std::unordered_map<const std::string, const MKLDNNPrimitivePtr,
+                     std::hash<std::string>>
+      primitive_pool_;
+  std::unordered_map<const std::string, const MKLDNNPrimitiveDescPtr,
+                     std::hash<std::string>>
+      primitive_desc_pool_;
+  std::vector<MKLDNNPrimitive> pipeline_;
+  MKLDNNStreamPtr stream_;
+  MKLDNNEnginePtr engine_;
+  bool ready_;
+};
+#endif
 /*! \brief device context pool singleton */
 class DeviceContextPool {
 public:

--- a/paddle/platform/mkldnn_helper.h
+++ b/paddle/platform/mkldnn_helper.h
+/* Copyright (c) 2017 PaddlePaddle Authors. All Rights Reserve.
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+    http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+#pragma once
+#include <mkldnn.hpp>
+namespace paddle {
+namespace platform {
+using MKLDNNStream = mkldnn::stream;
+using MKLDNNEngine = mkldnn::engine;
+using MKLDNNMemory = mkldnn::memory;
+using MKLDNNPrimitive = mkldnn::primitive;
+using MKLDNNPrimitiveDesc = mkldnn::handle<mkldnn_primitive_desc_t>;
+typedef std::unique_ptr<MKLDNNStream> MKLDNNStreamPtr;
+typedef std::unique_ptr<MKLDNNEngine> MKLDNNEnginePtr;
+typedef std::unique_ptr<MKLDNNMemory> MKLDNNMemoryPtr;
+typedef std::unique_ptr<MKLDNNPrimitive> MKLDNNPrimitivePtr;
+typedef std::unique_ptr<MKLDNNPrimitiveDesc> MKLDNNPrimitiveDescPtr;
+}  // namespace platform
+}  // namespace paddle