/* Copyright (c) 2017 PaddlePaddle Authors. All Rights Reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ #pragma once #include #include #include #include "paddle/fluid/framework/operator.h" #include "paddle/fluid/platform/place.h" namespace paddle { namespace platform { using MKLDNNStream = mkldnn::stream; using MKLDNNEngine = mkldnn::engine; using MKLDNNMemory = mkldnn::memory; using MKLDNNMemoryDescriptor = mkldnn::memory::desc; using MKLDNNPrimitive = mkldnn::primitive; using MKLDNNPrimitiveDesc = mkldnn::handle; typedef std::unique_ptr MKLDNNStreamPtr; typedef std::unique_ptr MKLDNNEnginePtr; typedef std::unique_ptr MKLDNNMemoryPtr; typedef std::unique_ptr MKLDNNPrimitivePtr; typedef std::unique_ptr MKLDNNPrimitiveDescPtr; template void* to_void_cast(const Type* t) { return static_cast(const_cast(t)); } template void* to_void_reinterpret_cast(const Type* t) { return reinterpret_cast(const_cast(t)); } template using tf_desc = typename Type::desc; template using tf_pd = typename Type::primitive_desc; template std::shared_ptr> MKLDNNFwdPrimitiveDesc(const Engine& e, Args&&... args) { auto desc = tf_desc(mkldnn::prop_kind::forward, (args)...); auto pd = new tf_pd(desc, e); return std::shared_ptr>(pd); } template tf_pd MKLDNNBwdPrimitiveDesc(const Engine& e, const Primitive& p, Args&&... args) { auto desc = tf_desc(args...); return tf_pd(desc, e, p); } inline mkldnn::memory::desc MKLDNNMemDesc(const std::vector& dims, mkldnn::memory::data_type data_type, mkldnn::memory::format format) { mkldnn::memory::dims tz = dims; std::cout<<"this is MKLDNNMemDesc"<<" data_type"<("use_mkldnn"); return use_mkldnn && platform::is_cpu_place(ctx.GetPlace()); } template mkldnn::memory::data_type MKLDNNGetDataType() { return mkldnn::memory::data_undef; } template <> inline mkldnn::memory::data_type MKLDNNGetDataType() { return mkldnn::memory::f32; } inline void Reorder(const mkldnn::memory& src, const mkldnn::memory& dst) { auto reorder_prim = mkldnn::reorder(src, dst); std::vector pipeline; pipeline.push_back(reorder_prim); mkldnn::stream(mkldnn::stream::kind::eager).submit(pipeline).wait(); } inline mkldnn::memory::format GetMKLDNNFormat(const mkldnn::memory memory) { return static_cast( memory.get_primitive_desc().desc().data.format); } inline mkldnn::memory::format GetMKLDNNFormat( const mkldnn::sum::primitive_desc& memory) { return static_cast( memory.dst_primitive_desc().desc().data.format); } class MKLDNNHandler { public: MKLDNNHandler(const MKLDNNDeviceContext& dev_ctx, mkldnn::engine engine, const std::string& base_key) : dev_ctx_(dev_ctx), engine_(engine), key_(base_key), is_reusing_(false) {} std::shared_ptr AcquireSrcMemory( const mkldnn::memory::desc& md, void* ptr) { return this->AcquireMemory(md, ptr, "@user_src_mem_p"); } std::shared_ptr AcquireWeightsMemory( const mkldnn::memory::desc& md, void* ptr) { return this->AcquireMemory(md, ptr, "@user_weights_mem_p"); } std::shared_ptr AcquireBiasMemory( const mkldnn::memory::desc& md, void* ptr) { return this->AcquireMemory(md, ptr, "@user_bias_mem_p"); } std::shared_ptr AcquireDstMemory( const mkldnn::memory::desc& md, void* ptr) { return this->AcquireMemory(md, ptr, "@user_dst_mem_p"); } std::shared_ptr AcquireDiffDstMemory( const mkldnn::memory::desc& md, void* ptr) { return this->AcquireMemory(md, ptr, "@user_diff_dst_mem_p"); } std::shared_ptr AcquireDiffSrcMemory( const mkldnn::memory::desc& md, void* ptr) { return this->AcquireMemory(md, ptr, "@user_diff_src_mem_p"); } std::shared_ptr AcquireMemoryFromPrimitive( mkldnn::memory::primitive_desc mdp, void* ptr, const std::string& suffix) { auto local_key = key_ + suffix; auto mem_p = std::static_pointer_cast(dev_ctx_.GetBlob(local_key)); PADDLE_ENFORCE((mem_p != nullptr) || (is_reusing_ == false), "Fail to find mem primitive in device context"); //mem_p = nullptr; if (mem_p == nullptr) { mem_p = std::make_shared(mdp, ptr); std::cout<<"mem_p == null"<( mkldnn::memory::primitive_desc{md, engine_}, ptr); dev_ctx_.SetBlob(local_key, mem_p); } else { mem_p->set_data_handle(ptr); // Mark that reusing happenned. All primitives from operator instance // should be reused or none of them. So we check consistency std::cout<<"2 is reuse = "<(dev_ctx_.GetBlob(local_key)); PADDLE_ENFORCE((target_memory_p != nullptr) || (is_reusing_ == false), "Fail to find mem primitive in device context"); if (target_memory_p == nullptr) { target_memory_p = user_memory_p; std::shared_ptr reorder_p; if (mpd != user_mpd) { target_memory_p = std::make_shared(mpd); auto reorder_p = std::make_shared(*user_memory_p, *target_memory_p); if(is_INT8){ mkldnn::primitive_attr attri; attri.set_output_scales(mask, scale_data); auto reorder_pd = std::shared_ptr( new mkldnn::reorder::primitive_desc(mpd, user_mpd, attri)); auto reorder_p = std::shared_ptr(new mkldnn::reorder(*reorder_pd, *user_memory_p, *target_memory_p)); } dev_ctx_.SetBlob(key_reorder_p, reorder_p); pipeline.push_back(*reorder_p); } dev_ctx_.SetBlob(local_key, target_memory_p); } else if (!is_persistent) { // Make reorder if needed auto reorder_p = std::static_pointer_cast( dev_ctx_.GetBlob(key_reorder_p)); if (reorder_p != nullptr) { pipeline.push_back(*reorder_p); } std::cout<<"3 is reuse = "<