diff --git a/cmake/external/mkldnn.cmake b/cmake/external/mkldnn.cmake index 5d24caebdcc5a28823164d718fb1628be5c4179d..b67c559fdf7a2b1695589a041cfa8fb8a9580516 100644 --- a/cmake/external/mkldnn.cmake +++ b/cmake/external/mkldnn.cmake @@ -63,9 +63,17 @@ ExternalProject_Add( -DMKLROOT:PATH=${MKLML_ROOT} ) -ADD_LIBRARY(mkldnn SHARED IMPORTED GLOBAL) -SET_PROPERTY(TARGET mkldnn PROPERTY IMPORTED_LOCATION ${MKLDNN_LIB}) -ADD_DEPENDENCIES(mkldnn ${MKLDNN_PROJECT}) +ADD_LIBRARY(shared_mkldnn SHARED IMPORTED GLOBAL) +SET_PROPERTY(TARGET shared_mkldnn PROPERTY IMPORTED_LOCATION ${MKLDNN_LIB}) +ADD_DEPENDENCIES(shared_mkldnn ${MKLDNN_PROJECT}) MESSAGE(STATUS "MKLDNN library: ${MKLDNN_LIB}") add_definitions(-DPADDLE_WITH_MKLDNN) -LIST(APPEND external_project_dependencies mkldnn) +LIST(APPEND external_project_dependencies shared_mkldnn) + +# generate a static dummy target to track mkldnn dependencies +# for cc_library(xxx SRCS xxx.c DEPS mkldnn) +SET(dummyfile ${CMAKE_CURRENT_BINARY_DIR}/mkldnn_dummy.c) +FILE(WRITE ${dummyfile} "const char * dummy = \"${dummyfile}\";") +ADD_LIBRARY(mkldnn STATIC ${dummyfile}) +TARGET_LINK_LIBRARIES(mkldnn ${MKLDNN_LIB} ${MKLML_LIB} ${MKLML_IOMP_LIB}) +ADD_DEPENDENCIES(mkldnn ${MKLDNN_PROJECT}) diff --git a/paddle/framework/CMakeLists.txt b/paddle/framework/CMakeLists.txt index 528e45b51099d97a1f6f0dfc971b6231f928af94..3967a40136d6493d533ef9aadd2054cc23592879 100644 --- a/paddle/framework/CMakeLists.txt +++ b/paddle/framework/CMakeLists.txt @@ -41,7 +41,7 @@ device_context) cc_library(op_proto_maker SRCS op_proto_maker.cc DEPS framework_proto attribute) cc_test(op_proto_maker_test SRCS op_proto_maker_test.cc DEPS op_proto_maker) cc_library(op_info SRCS op_info.cc DEPS attribute framework_proto) -cc_library(shape_inference SRCS shape_inference.cc DEPS ddim attribute) +cc_library(shape_inference SRCS shape_inference.cc DEPS ddim attribute device_context) cc_library(operator SRCS operator.cc DEPS op_info device_context tensor scope glog shape_inference data_transform) cc_test(operator_test SRCS operator_test.cc DEPS operator op_registry init) diff --git a/paddle/operators/tensor.save b/paddle/operators/tensor.save deleted file mode 100644 index c24308a7d0131b84c28c0a9857cce4949afb2091..0000000000000000000000000000000000000000 Binary files a/paddle/operators/tensor.save and /dev/null differ diff --git a/paddle/platform/CMakeLists.txt b/paddle/platform/CMakeLists.txt index 8c4803b9739bb54cae89de62468a47631a5dde94..44f6d85cd1510f309595ca711de2e0f767219580 100644 --- a/paddle/platform/CMakeLists.txt +++ b/paddle/platform/CMakeLists.txt @@ -21,10 +21,16 @@ ELSE() set(GPU_CTX_DEPS) ENDIF() +IF(WITH_MKLDNN) + set(MKLDNN_CTX_DEPS mkldnn) +ELSE() + set(MKLDNN_CTX_DEPS) +ENDIF() + # memcpy deoends on device_context, here add deps individually for # avoiding cycle dependencies cc_library(device_context SRCS device_context.cc DEPS memory buddy_allocator - system_allocator memory_block meta_data meta_cache place eigen3 ${GPU_CTX_DEPS}) + system_allocator memory_block meta_data meta_cache place eigen3 ${GPU_CTX_DEPS} ${MKLDNN_CTX_DEPS}) nv_test(device_context_test SRCS device_context_test.cu DEPS device_context gpu_info) nv_test(cudnn_helper_test SRCS cudnn_helper_test.cc DEPS dynload_cuda) diff --git a/paddle/platform/device_context.cc b/paddle/platform/device_context.cc index 4bf643e048dcf97f7888dcc78eb1b4fa4feee046..9d9348079a0179418ab1a1474cfd8b69136f26b2 100644 --- a/paddle/platform/device_context.cc +++ b/paddle/platform/device_context.cc @@ -168,5 +168,69 @@ cudaStream_t CUDADeviceContext::stream() const { return stream_; } #endif +#ifdef PADDLE_WITH_MKLDNN +MKLDNNDeviceContext::MKLDNNDeviceContext(CPUPlace place) + : CPUDeviceContext(place), ready_(false) { + stream_.reset(new mkldnn::stream(mkldnn::stream::kind::eager)); + engine_.reset(new mkldnn::engine(mkldnn::engine::cpu, 0)); +} + +template +void MKLDNNDeviceContext::AddElement(const std::string& op_key, + const T& value) { + if (GetElement(op_key)) { + return; + } + GetElementPool().emplace(op_key, std::move(value)); +} + +template +const T& MKLDNNDeviceContext::GetElement(const std::string& op_key) const { + auto it = GetElementPool().find(op_key); + return it == GetElementPool().end() ? nullptr : it->second; +} + +template <> +const std::unordered_map>& +MKLDNNDeviceContext::GetElementPool() const { + return memory_pool_; +} + +template <> +const std::unordered_map>& +MKLDNNDeviceContext::GetElementPool() const { + return primitive_pool_; +} + +template <> +const std::unordered_map>& +MKLDNNDeviceContext::GetElementPool() const { + return primitive_desc_pool_; +} + +void MKLDNNDeviceContext::Execute(bool block) { + if (pipeline_.empty()) { + return; + } + ResetStream(); + stream_->submit(pipeline_).wait(block); + ready_ = false; + pipeline_.clear(); +} + +void MKLDNNDeviceContext::ResetStream() { + if (ready_) { + return; + } + // TODO(TJ): change me when mkldnn have specific method to reset this state + stream_.reset(new mkldnn::stream(mkldnn::stream::kind::eager)); + ready_ = true; +} + +#endif + } // namespace platform } // namespace paddle diff --git a/paddle/platform/device_context.h b/paddle/platform/device_context.h index 609ea4bd3ad50e2eefa908539339903ed1f0a807..7a0040c9c229af79ea8be1049dfd6c0d1b4d19cf 100644 --- a/paddle/platform/device_context.h +++ b/paddle/platform/device_context.h @@ -21,6 +21,10 @@ limitations under the License. */ #define EIGEN_USE_GPU #endif +#ifdef PADDLE_WITH_MKLDNN +#include "paddle/platform/mkldnn_helper.h" +#endif + #include "paddle/platform/enforce.h" #include "paddle/platform/place.h" #include "unsupported/Eigen/CXX11/Tensor" @@ -105,6 +109,54 @@ struct DefaultDeviceContextType { #endif +#ifdef PADDLE_WITH_MKLDNN +class MKLDNNDeviceContext : public CPUDeviceContext { + public: + explicit MKLDNNDeviceContext(CPUPlace place); + + /* \brief Add new element: memory, primitive or primitive desc */ + template + void AddElement(const std::string& op_key, const T& value); + + /* \brief Get existed element: memory, primitive or primitive desc */ + template + const T& GetElement(const std::string& op_key) const; + + /* \brief Get element pool: memory, primitive or primitive desc pool */ + template + const std::unordered_map>& + GetElementPool() const; + + /* \brief Get the active engine */ + const MKLDNNEngine& engine() const { return *engine_; } + + /* \brief Submit primitive to pipeline */ + void Submit(const MKLDNNPrimitivePtr& p) { pipeline_.push_back(*p); } + + /*! \brief Execute all submitted primitives in pipeline */ + void Execute(bool block = true); + + protected: + /*! \brief Reset the stream to prepare next exectue */ + void ResetStream(); + + private: + std::unordered_map> + memory_pool_; + std::unordered_map> + primitive_pool_; + std::unordered_map> + primitive_desc_pool_; + std::vector pipeline_; + MKLDNNStreamPtr stream_; + MKLDNNEnginePtr engine_; + bool ready_; +}; +#endif + /*! \brief device context pool singleton */ class DeviceContextPool { public: diff --git a/paddle/platform/mkldnn_helper.h b/paddle/platform/mkldnn_helper.h new file mode 100644 index 0000000000000000000000000000000000000000..cd52a8b4c434071a030a8e7a8a70fc3adba8460c --- /dev/null +++ b/paddle/platform/mkldnn_helper.h @@ -0,0 +1,35 @@ +/* Copyright (c) 2017 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once + +#include + +namespace paddle { +namespace platform { + +using MKLDNNStream = mkldnn::stream; +using MKLDNNEngine = mkldnn::engine; +using MKLDNNMemory = mkldnn::memory; +using MKLDNNPrimitive = mkldnn::primitive; +using MKLDNNPrimitiveDesc = mkldnn::handle; + +typedef std::unique_ptr MKLDNNStreamPtr; +typedef std::unique_ptr MKLDNNEnginePtr; +typedef std::unique_ptr MKLDNNMemoryPtr; +typedef std::unique_ptr MKLDNNPrimitivePtr; +typedef std::unique_ptr MKLDNNPrimitiveDescPtr; + +} // namespace platform +} // namespace paddle