/* Copyright (c) 2017 PaddlePaddle Authors. All Rights Reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ #pragma once #include #include #include #include "paddle/fluid/framework/operator.h" #include "paddle/fluid/platform/place.h" namespace paddle { namespace platform { using MKLDNNStream = mkldnn::stream; using MKLDNNEngine = mkldnn::engine; using MKLDNNMemory = mkldnn::memory; using MKLDNNMemoryDescriptor = mkldnn::memory::desc; using MKLDNNPrimitive = mkldnn::primitive; using MKLDNNPrimitiveDesc = mkldnn::handle; typedef std::unique_ptr MKLDNNStreamPtr; typedef std::unique_ptr MKLDNNEnginePtr; typedef std::unique_ptr MKLDNNMemoryPtr; typedef std::unique_ptr MKLDNNPrimitivePtr; typedef std::unique_ptr MKLDNNPrimitiveDescPtr; template void* to_void_cast(const Type* t) { return static_cast(const_cast(t)); } template void* to_void_reinterpret_cast(const Type* t) { return reinterpret_cast(const_cast(t)); } template using tf_desc = typename Type::desc; template using tf_pd = typename Type::primitive_desc; template std::shared_ptr> MKLDNNFwdPrimitiveDesc(const Engine& e, Args&&... args) { auto desc = tf_desc(mkldnn::prop_kind::forward, (args)...); auto pd = new tf_pd(desc, e); return std::shared_ptr>(pd); } template tf_pd MKLDNNBwdPrimitiveDesc(const Engine& e, const Primitive& p, Args&&... args) { auto desc = tf_desc(args...); return tf_pd(desc, e, p); } inline mkldnn::memory::desc MKLDNNMemDesc(const std::vector& dims, mkldnn::memory::data_type data_type, mkldnn::memory::format format) { mkldnn::memory::dims tz = dims; return mkldnn::memory::desc({tz}, data_type, format); } inline bool CanMKLDNNBeUsed(const framework::ExecutionContext& ctx) { bool use_mkldnn = ctx.Attr("use_mkldnn"); return use_mkldnn && platform::is_cpu_place(ctx.GetPlace()); } template mkldnn::memory::data_type MKLDNNGetDataType() { return mkldnn::memory::data_undef; } template <> inline mkldnn::memory::data_type MKLDNNGetDataType() { return mkldnn::memory::f32; } inline void Reorder(const mkldnn::memory& src, const mkldnn::memory& dst) { auto reorder_prim = mkldnn::reorder(src, dst); std::vector pipeline; pipeline.push_back(reorder_prim); mkldnn::stream(mkldnn::stream::kind::eager).submit(pipeline).wait(); } inline mkldnn::memory::format GetMKLDNNFormat(const mkldnn::memory memory) { return static_cast( memory.get_primitive_desc().desc().data.format); } inline mkldnn::memory::format GetMKLDNNFormat( const mkldnn::sum::primitive_desc& memory) { return static_cast( memory.dst_primitive_desc().desc().data.format); } class MKLDNNHandler { public: MKLDNNHandler(const MKLDNNDeviceContext& dev_ctx, mkldnn::engine engine, const std::string& base_key) : dev_ctx_(dev_ctx), engine_(engine), key_(base_key), is_reusing_(false) {} std::shared_ptr AcquireSrcMemory( const mkldnn::memory::desc& md, void* ptr) { return this->AcquireMemory(md, ptr, "@user_src_mem_p"); } std::shared_ptr AcquireWeightsMemory( const mkldnn::memory::desc& md, void* ptr) { return this->AcquireMemory(md, ptr, "@user_weights_mem_p"); } std::shared_ptr AcquireBiasMemory( const mkldnn::memory::desc& md, void* ptr) { return this->AcquireMemory(md, ptr, "@user_bias_mem_p"); } std::shared_ptr AcquireDstMemory( const mkldnn::memory::desc& md, void* ptr) { return this->AcquireMemory(md, ptr, "@user_dst_mem_p"); } std::shared_ptr AcquireDiffDstMemory( const mkldnn::memory::desc& md, void* ptr) { return this->AcquireMemory(md, ptr, "@user_diff_dst_mem_p"); } std::shared_ptr AcquireDiffSrcMemory( const mkldnn::memory::desc& md, void* ptr) { return this->AcquireMemory(md, ptr, "@user_diff_src_mem_p"); } std::shared_ptr AcquireMemoryFromPrimitive( mkldnn::memory::primitive_desc mdp, void* ptr, const std::string& suffix) { auto local_key = key_ + suffix; auto mem_p = std::static_pointer_cast(dev_ctx_.GetBlob(local_key)); PADDLE_ENFORCE((mem_p != nullptr) || (is_reusing_ == false), "Fail to find mem primitive in device context"); if (mem_p == nullptr) { mem_p = std::make_shared(mdp, ptr); dev_ctx_.SetBlob(local_key, mem_p); } else { mem_p->set_data_handle(ptr); // Mark that reusing happenned. All primitives from operator instance // should be reused or none of them. So we check consistency is_reusing_ = true; } return mem_p; } std::shared_ptr AcquireMemory(const mkldnn::memory::desc& md, void* ptr, const std::string& suffix) { /*Generate key*/ auto local_key = key_ + suffix; auto mem_p = std::static_pointer_cast(dev_ctx_.GetBlob(local_key)); PADDLE_ENFORCE((mem_p != nullptr) || (is_reusing_ == false), "Fail to find mem primitive in device context"); if (mem_p == nullptr) { mem_p = std::make_shared( mkldnn::memory::primitive_desc{md, engine_}, ptr); dev_ctx_.SetBlob(local_key, mem_p); } else { mem_p->set_data_handle(ptr); // Mark that reusing happenned. All primitives from operator instance // should be reused or none of them. So we check consistency is_reusing_ = true; } return mem_p; } std::shared_ptr AcquireMemory( mkldnn::memory::primitive_desc& mpd, // NOLINT mkldnn::memory::primitive_desc& user_mpd, // NOLINT const std::shared_ptr user_memory_p, const std::string& suffix, const std::vector& pipeline, bool is_test = false) { // NOLINT // create reorder primitive if the input format is not the preferred one auto local_key = key_ + suffix; auto key_reorder_p = key_ + suffix + "reorder_p"; auto target_memory_p = std::static_pointer_cast(dev_ctx_.GetBlob(local_key)); PADDLE_ENFORCE((target_memory_p != nullptr) || (is_reusing_ == false), "Fail to find mem primitive in device context"); if (target_memory_p == nullptr) { target_memory_p = user_memory_p; std::shared_ptr reorder_p; if (mpd != user_mpd) { target_memory_p = std::make_shared(mpd); auto reorder_p = std::make_shared(*user_memory_p, *target_memory_p); dev_ctx_.SetBlob(key_reorder_p, reorder_p); pipeline.push_back(*reorder_p); } dev_ctx_.SetBlob(local_key, target_memory_p); } else if (!is_test) { // Make reorder if needed auto reorder_p = std::static_pointer_cast( dev_ctx_.GetBlob(key_reorder_p)); if (reorder_p != nullptr) { pipeline.push_back(*reorder_p); } is_reusing_ = true; } return target_memory_p; } static std::string GetHash(mkldnn::memory::dims& operand_dims, // NOLINT const std::string& suffix) { return dims2str(operand_dims) + suffix; } protected: static std::string dims2str(const mkldnn::memory::dims& operand_dims) { std::string dstr = ""; for (size_t i = 0; i < operand_dims.size(); ++i) { dstr += std::to_string(operand_dims[i]) + "-"; } return dstr; } protected: const MKLDNNDeviceContext& dev_ctx_; mkldnn::engine engine_; std::string key_; bool is_reusing_; }; inline mkldnn::memory::format MKLDNNFormatForSize( size_t dims_size, mkldnn::memory::format data_format) { if (dims_size == 1) { return mkldnn::memory::format::x; } else if (dims_size == 2) { return mkldnn::memory::format::nc; } return data_format; } inline mkldnn::memory::format data_format_to_memory_format( const std::string& data_format) { switch (framework::StringToDataLayout(data_format)) { case framework::DataLayout::kNHWC: return mkldnn::memory::format::nhwc; case framework::DataLayout::kNCHW: return mkldnn::memory::format::nchw; default: return mkldnn::memory::format::any; } } } // namespace platform } // namespace paddle