diff --git a/paddle/fluid/framework/data_layout_transform.cc b/paddle/fluid/framework/data_layout_transform.cc index a42b686548c71b2fd372040d3d65345a9b1ca802..1701359b045999a90ffb96a3106a86025dba828c 100644 --- a/paddle/fluid/framework/data_layout_transform.cc +++ b/paddle/fluid/framework/data_layout_transform.cc @@ -100,21 +100,21 @@ void TransDataLayout(const OpKernelType& kernel_type_for_var, } #ifdef PADDLE_WITH_MKLDNN -using mkldnn::memory; -using mkldnn::primitive; -using mkldnn::reorder; +using dnnl::memory; +using dnnl::primitive; +using dnnl::reorder; -void* GetDataFromTensor(const Tensor& tensor, mkldnn::memory::data_type type) { +void* GetDataFromTensor(const Tensor& tensor, dnnl::memory::data_type type) { switch (type) { - case mkldnn::memory::data_type::f32: + case dnnl::memory::data_type::f32: return platform::to_void_cast(tensor.data()); - case mkldnn::memory::data_type::s8: + case dnnl::memory::data_type::s8: return platform::to_void_cast(tensor.data()); - case mkldnn::memory::data_type::u8: + case dnnl::memory::data_type::u8: return platform::to_void_cast(tensor.data()); - case mkldnn::memory::data_type::s32: + case dnnl::memory::data_type::s32: return platform::to_void_cast(tensor.data()); - case mkldnn::memory::data_type::bf16: + case dnnl::memory::data_type::bf16: return platform::to_void_cast(tensor.data()); default: PADDLE_THROW( diff --git a/paddle/fluid/framework/data_layout_transform.h b/paddle/fluid/framework/data_layout_transform.h index 3404ba2db67e5f0e90203d7ee0bb238bb377af0f..3c58a2d09f2599072c550244b287b3fb64450fef 100644 --- a/paddle/fluid/framework/data_layout_transform.h +++ b/paddle/fluid/framework/data_layout_transform.h @@ -37,7 +37,7 @@ namespace paddle { namespace framework { #ifdef PADDLE_WITH_MKLDNN -using MKLDNNDataType = mkldnn::memory::data_type; +using MKLDNNDataType = dnnl::memory::data_type; inline MKLDNNMemoryFormat ToMKLDNNFormat(const DataLayout& layout) { switch (layout) { diff --git a/paddle/fluid/framework/data_layout_transform_test.cc b/paddle/fluid/framework/data_layout_transform_test.cc index 20443e9a3dcad62770e62d9a780960bf3db43111..d910543ed7ea439082f83776b82969e38c9ac975 100644 --- a/paddle/fluid/framework/data_layout_transform_test.cc +++ b/paddle/fluid/framework/data_layout_transform_test.cc @@ -44,7 +44,7 @@ TEST(DataTransform, DataLayoutFunction) { } #ifdef PADDLE_WITH_MKLDNN -TEST(DataTransform, GetDataFromTensorDNNL) { +TEST(DataTransformBf16, GetDataFromTensorDNNL) { auto place = paddle::platform::CPUPlace(); paddle::framework::Tensor in = paddle::framework::Tensor(); in.mutable_data( @@ -55,4 +55,14 @@ TEST(DataTransform, GetDataFromTensorDNNL) { EXPECT_EQ(in_data, paddle::platform::to_void_cast( in.data())); } + +TEST(DataTransformInt32, GetDataFromTensorDNNL) { + auto place = paddle::platform::CPUPlace(); + paddle::framework::Tensor in = paddle::framework::Tensor(); + in.mutable_data(paddle::framework::make_ddim({2, 3, 1, 2}), place); + + void* in_data = + paddle::framework::GetDataFromTensor(in, dnnl::memory::data_type::s32); + EXPECT_EQ(in_data, paddle::platform::to_void_cast(in.data())); +} #endif diff --git a/paddle/fluid/framework/data_type_transform_test.cc b/paddle/fluid/framework/data_type_transform_test.cc index ea7a665bcbe02ff382f1b3bf04ce177a674483c9..32a15709106a64cfad9d56d165adb76f268059b1 100644 --- a/paddle/fluid/framework/data_type_transform_test.cc +++ b/paddle/fluid/framework/data_type_transform_test.cc @@ -310,4 +310,117 @@ TEST(DataTypeTransform, CPUTransform) { static_cast(in_data_bool[i]).x); } } + + // data type transform from/to int32 + { + paddle::framework::Tensor in; + paddle::framework::Tensor out; + + int32_t* ptr = + in.mutable_data(paddle::framework::make_ddim({2, 3}), place); + int data_number = 2 * 3; + + for (int i = 0; i < data_number; ++i) { + ptr[i] = i; + } + + // transform from int32 to other data types + paddle::framework::TransDataType(kernel_int32, kernel_fp32, in, &out); + float* out_data_float = out.data(); + for (int i = 0; i < data_number; ++i) { + EXPECT_EQ(out_data_float[i], static_cast(ptr[i])); + } + + paddle::framework::TransDataType(kernel_int32, kernel_fp64, in, &out); + double* out_data_double = out.data(); + for (int i = 0; i < data_number; ++i) { + EXPECT_EQ(out_data_double[i], static_cast(ptr[i])); + } + + paddle::framework::TransDataType(kernel_int32, kernel_bf16, in, &out); + paddle::platform::bfloat16* out_data_bf16 = + out.data(); + for (int i = 0; i < data_number; ++i) { + EXPECT_EQ(out_data_bf16[i], + static_cast(ptr[i])); + } + + paddle::framework::TransDataType(kernel_int32, kernel_int64, in, &out); + int64_t* out_data_int64 = out.data(); + for (int i = 0; i < data_number; ++i) { + EXPECT_EQ(out_data_int64[i], static_cast(ptr[i])); + } + + paddle::framework::TransDataType(kernel_int32, kernel_bool, in, &out); + bool* out_data_bool = out.data(); + for (int i = 0; i < data_number; ++i) { + EXPECT_EQ(out_data_bool[i], static_cast(ptr[i])); + } + + // transform float to int32 + float* in_data_float = + in.mutable_data(paddle::framework::make_ddim({2, 3}), place); + for (int i = 0; i < data_number; ++i) { + in_data_float[i] = i; + } + + paddle::framework::TransDataType(kernel_fp32, kernel_int32, in, &out); + ptr = out.data(); + for (int i = 0; i < data_number; ++i) { + EXPECT_EQ(ptr[i], static_cast(in_data_float[i])); + } + + // transform double to int32 + double* in_data_double = + in.mutable_data(paddle::framework::make_ddim({2, 3}), place); + for (int i = 0; i < data_number; ++i) { + in_data_double[i] = i; + } + + paddle::framework::TransDataType(kernel_fp64, kernel_int32, in, &out); + ptr = out.data(); + for (int i = 0; i < data_number; ++i) { + EXPECT_EQ(ptr[i], static_cast(in_data_double[i])); + } + + // transform bfloat16 to int32 + paddle::platform::bfloat16* in_data_bf16 = + in.mutable_data( + paddle::framework::make_ddim({2, 3}), place); + for (int i = 0; i < data_number; ++i) { + in_data_bf16[i] = i; + } + + paddle::framework::TransDataType(kernel_bf16, kernel_int32, in, &out); + ptr = out.data(); + for (int i = 0; i < data_number; ++i) { + EXPECT_EQ(ptr[i], static_cast(in_data_bf16[i])); + } + + // transform int64 to int32 + int64_t* in_data_int64 = + in.mutable_data(paddle::framework::make_ddim({2, 3}), place); + for (int i = 0; i < data_number; ++i) { + in_data_int64[i] = i; + } + + paddle::framework::TransDataType(kernel_int64, kernel_int32, in, &out); + ptr = out.data(); + for (int i = 0; i < data_number; ++i) { + EXPECT_EQ(ptr[i], static_cast(in_data_int64[i])); + } + + // transform bool to int32 + bool* in_data_bool = + in.mutable_data(paddle::framework::make_ddim({2, 3}), place); + for (int i = 0; i < data_number; ++i) { + in_data_bool[i] = i; + } + + paddle::framework::TransDataType(kernel_bool, kernel_int32, in, &out); + ptr = out.data(); + for (int i = 0; i < data_number; ++i) { + EXPECT_EQ(ptr[i], static_cast(in_data_bool[i])); + } + } } diff --git a/paddle/fluid/operators/elementwise/mkldnn/elementwise_mkldnn_op.h b/paddle/fluid/operators/elementwise/mkldnn/elementwise_mkldnn_op.h index ffcdc079985fa66793599c0b7f1b3a71f400b393..131ea3901da8ac4db243a5abee1c09dfad6917e1 100644 --- a/paddle/fluid/operators/elementwise/mkldnn/elementwise_mkldnn_op.h +++ b/paddle/fluid/operators/elementwise/mkldnn/elementwise_mkldnn_op.h @@ -26,9 +26,9 @@ namespace operators { using framework::DataLayout; using framework::Tensor; -using mkldnn::memory; -using mkldnn::primitive; -using mkldnn::stream; +using dnnl::memory; +using dnnl::primitive; +using dnnl::stream; template class EltwiseMKLDNNKernel : public framework::OpKernel { diff --git a/paddle/fluid/operators/fused/mkldnn/fusion_lstm_mkldnn_op.cc b/paddle/fluid/operators/fused/mkldnn/fusion_lstm_mkldnn_op.cc index edf541fde2a518510bcd527f3242d1c58d1267b8..385e4ad8808a51a207ef8779c4544da60f0a6a3d 100644 --- a/paddle/fluid/operators/fused/mkldnn/fusion_lstm_mkldnn_op.cc +++ b/paddle/fluid/operators/fused/mkldnn/fusion_lstm_mkldnn_op.cc @@ -31,12 +31,11 @@ class LSTMMKLDNNHandler public: LSTMMKLDNNHandler(const paddle::framework::ExecutionContext& ctx, const platform::MKLDNNDeviceContext& dev_ctx, - const mkldnn::engine mkldnn_engine, - platform::Place cpu_place, const LoDTensor* input, - const Tensor* weight_h, const Tensor* h0, const Tensor* c0, - const bool is_reverse, const int64_t N, const int64_t Ti, - const int64_t IC, const int64_t OC, - const std::string& unique_name) + const dnnl::engine mkldnn_engine, platform::Place cpu_place, + const LoDTensor* input, const Tensor* weight_h, + const Tensor* h0, const Tensor* c0, const bool is_reverse, + const int64_t N, const int64_t Ti, const int64_t IC, + const int64_t OC, const std::string& unique_name) : RNNMKLDNNHandler( ctx, dev_ctx, mkldnn_engine, ctx.GetPlace(), input, weight_h, h0, is_reverse, N, Ti, IC, OC, 4, diff --git a/paddle/fluid/operators/fused/mkldnn/fusion_rnn_mkldnn.h b/paddle/fluid/operators/fused/mkldnn/fusion_rnn_mkldnn.h index 5ef84eac4e672dddad0366ed2c2af4ab53252643..d89205b43d286fb955e20beadc6fdd2505c4398f 100644 --- a/paddle/fluid/operators/fused/mkldnn/fusion_rnn_mkldnn.h +++ b/paddle/fluid/operators/fused/mkldnn/fusion_rnn_mkldnn.h @@ -30,12 +30,11 @@ class RNNMKLDNNHandler : public platform::MKLDNNHandlerT { public: RNNMKLDNNHandler(const paddle::framework::ExecutionContext& ctx, const platform::MKLDNNDeviceContext& dev_ctx, - const mkldnn::engine mkldnn_engine, - platform::Place cpu_place, const LoDTensor* input, - const Tensor* weight_h, const Tensor* h0, - const bool is_reverse, const int64_t N, const int64_t Ti, - const int64_t IC, const int64_t OC, const int64_t G, - const std::string& unique_name) + const dnnl::engine mkldnn_engine, platform::Place cpu_place, + const LoDTensor* input, const Tensor* weight_h, + const Tensor* h0, const bool is_reverse, const int64_t N, + const int64_t Ti, const int64_t IC, const int64_t OC, + const int64_t G, const std::string& unique_name) : platform::MKLDNNHandlerT( dev_ctx, dev_ctx.GetEngine(), cpu_place, CreateKey(dev_ctx, unique_name, MKLDNNGetDataType(), Ti)), diff --git a/paddle/fluid/operators/mkldnn/axpy_handler.cc b/paddle/fluid/operators/mkldnn/axpy_handler.cc index db1127b055c31e6baf69b263b79f9da0e7bf71fd..ee630fe186a24b828f153c4938d7fd1b6b3fbed3 100644 --- a/paddle/fluid/operators/mkldnn/axpy_handler.cc +++ b/paddle/fluid/operators/mkldnn/axpy_handler.cc @@ -17,7 +17,7 @@ limitations under the License. */ #include #include -#include "mkldnn.hpp" +#include "dnnl.hpp" #include "paddle/fluid/operators/mkldnn/axpy_handler.h" #include "paddle/fluid/platform/bfloat16.h" #include "paddle/fluid/platform/device_context.h" diff --git a/paddle/fluid/operators/mkldnn/dequantize_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/dequantize_mkldnn_op.cc index 0c8ea84296ec085abde9413329cc643df99f7aa8..d4d78bdfb661335edc2d1293f1bd9240cf0bc2b2 100644 --- a/paddle/fluid/operators/mkldnn/dequantize_mkldnn_op.cc +++ b/paddle/fluid/operators/mkldnn/dequantize_mkldnn_op.cc @@ -12,7 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#include "mkldnn.hpp" +#include "dnnl.hpp" #include "paddle/fluid/framework/data_layout_transform.h" #include "paddle/fluid/framework/tensor.h" #include "paddle/fluid/operators/dequantize_op.h" @@ -23,13 +23,13 @@ limitations under the License. */ namespace paddle { namespace operators { -using mkldnn::memory; -using mkldnn::primitive; -using mkldnn::reorder; +using dnnl::memory; +using dnnl::primitive; +using dnnl::reorder; using platform::to_void_cast; using Tensor = framework::Tensor; using framework::DataLayout; -using mkldnn::stream; +using dnnl::stream; using platform::GetMKLDNNFormat; template @@ -64,7 +64,7 @@ class DeQuantOpKernel : public framework::OpKernel { auto src_tz = paddle::framework::vectorize(input->dims()); auto dst_tz = paddle::framework::vectorize(output->dims()); - mkldnn::memory::data_type src_dt = + dnnl::memory::data_type src_dt = paddle::framework::ToMKLDNNDataType(input->type()); MKLDNNMemoryFormat src_fmt = input->format(); @@ -76,34 +76,34 @@ class DeQuantOpKernel : public framework::OpKernel { const std::string key_src_mem = key + "@s"; const std::string key_dst_mem = key + "@d"; - std::shared_ptr src_memory; - std::shared_ptr dst_memory; + std::shared_ptr src_memory; + std::shared_ptr dst_memory; std::shared_ptr reorder_p; reorder_p = std::static_pointer_cast(dev_ctx.GetBlob(key_prim)); if (reorder_p == nullptr) { - mkldnn::primitive_attr attri; + dnnl::primitive_attr attri; int mask = 0; float reorder_scale = 1. / scale_data; attri.set_output_scales(mask, {reorder_scale}); if (with_shift) { - mkldnn::post_ops post_operations; + dnnl::post_ops post_operations; post_operations.append_sum(); attri.set_post_ops(post_operations); std::fill(output_data, output_data + output->numel(), reorder_shift); } auto src_md = platform::MKLDNNMemDesc({src_tz}, src_dt, src_fmt); - src_memory = std::make_shared( - src_md, engine, to_void_cast(input_data)); + src_memory = std::make_shared(src_md, engine, + to_void_cast(input_data)); auto dst_md = platform::MKLDNNMemDesc({dst_tz}, memory::data_type::f32, platform::MKLDNNFormatForSize( dst_tz.size(), MKLDNNMemoryFormat::nchw)); - dst_memory = std::make_shared( + dst_memory = std::make_shared( dst_md, engine, to_void_cast(output_data)); auto reorder_pd = std::shared_ptr( @@ -113,12 +113,12 @@ class DeQuantOpKernel : public framework::OpKernel { dev_ctx.SetBlob(key_src_mem, src_memory); dev_ctx.SetBlob(key_dst_mem, dst_memory); } else { - src_memory = std::static_pointer_cast( - dev_ctx.GetBlob(key_src_mem)); + src_memory = + std::static_pointer_cast(dev_ctx.GetBlob(key_src_mem)); src_memory->set_data_handle(to_void_cast(input_data)); - dst_memory = std::static_pointer_cast( - dev_ctx.GetBlob(key_dst_mem)); + dst_memory = + std::static_pointer_cast(dev_ctx.GetBlob(key_dst_mem)); if (with_shift) std::fill(output_data, output_data + output->numel(), reorder_shift); dst_memory->set_data_handle(output->mutable_data(ctx.GetPlace())); diff --git a/paddle/fluid/operators/mkldnn/quantize_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/quantize_mkldnn_op.cc index 815af4eaaf1b37b57b713337c32fdf4deb6888af..c44f22dd02face48fe344ea2ee91ead4e9836837 100644 --- a/paddle/fluid/operators/mkldnn/quantize_mkldnn_op.cc +++ b/paddle/fluid/operators/mkldnn/quantize_mkldnn_op.cc @@ -12,7 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#include "mkldnn.hpp" +#include "dnnl.hpp" #include "paddle/fluid/framework/tensor.h" #include "paddle/fluid/operators/quantize_op.h" #include "paddle/fluid/platform/mkldnn_helper.h" @@ -21,13 +21,13 @@ limitations under the License. */ namespace paddle { namespace operators { -using mkldnn::memory; -using mkldnn::primitive; -using mkldnn::reorder; +using dnnl::memory; +using dnnl::primitive; +using dnnl::reorder; using platform::to_void_cast; using Tensor = framework::Tensor; using framework::DataLayout; -using mkldnn::stream; +using dnnl::stream; using platform::GetMKLDNNFormat; template @@ -65,19 +65,19 @@ class QuantOpKernel : public framework::OpKernel { bool bfloat16 = ctx.Attr("bfloat16"); // TODO(jczaja): Refactor with Acquire API - std::shared_ptr src_memory; - std::shared_ptr dst_memory; + std::shared_ptr src_memory; + std::shared_ptr dst_memory; std::shared_ptr reorder_p; std::string out_layout = ctx.Attr("output_format"); MKLDNNMemoryFormat out_format = platform::data_format_to_memory_format(out_layout); - mkldnn::primitive_attr attri; + dnnl::primitive_attr attri; int mask = 0; attri.set_output_scales(mask, {scale_data}); if (with_shift) { - mkldnn::post_ops post_operations; + dnnl::post_ops post_operations; post_operations.append_sum(); attri.set_post_ops(post_operations); uint8_t* output_data = output->mutable_data(ctx.GetPlace()); @@ -87,10 +87,10 @@ class QuantOpKernel : public framework::OpKernel { auto src_md = platform::MKLDNNMemDesc({src_tz}, memory::data_type::f32, input->format()); - src_memory = std::make_shared(src_md, engine, - to_void_cast(input_data)); + src_memory = std::make_shared(src_md, engine, + to_void_cast(input_data)); - std::shared_ptr dst_md; + std::shared_ptr dst_md; if (bfloat16) { platform::SetDstMemoryQuantized( ctx, output, dst_tz, engine, dst_md, dst_memory, out_format); diff --git a/paddle/fluid/operators/mkldnn/requantize_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/requantize_mkldnn_op.cc index 6296654b8bdd9d071fd9484e359a3a5943b1a655..8d14de6f7c969b2df38818da69e78ea357fc6b7b 100644 --- a/paddle/fluid/operators/mkldnn/requantize_mkldnn_op.cc +++ b/paddle/fluid/operators/mkldnn/requantize_mkldnn_op.cc @@ -12,7 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#include "mkldnn.hpp" +#include "dnnl.hpp" #include "paddle/fluid/framework/data_layout_transform.h" #include "paddle/fluid/framework/tensor.h" #include "paddle/fluid/operators/requantize_op.h" @@ -93,7 +93,7 @@ class ReQuantOpKernel : public framework::OpKernel { int mask = 0; attri.set_output_scales(mask, {reorder_scale}); if (with_shift) { - mkldnn::post_ops post_operations; + dnnl::post_ops post_operations; post_operations.append_sum(); attri.set_post_ops(post_operations); uint8_t* output_data = output->mutable_data(ctx.GetPlace()); diff --git a/paddle/fluid/platform/mkldnn_helper.h b/paddle/fluid/platform/mkldnn_helper.h index 9236521fe1d95f9b97127322bcd4f16ce003a9d2..a387bbc3462ae016ff1b91f6d59641ccd09dc29b 100644 --- a/paddle/fluid/platform/mkldnn_helper.h +++ b/paddle/fluid/platform/mkldnn_helper.h @@ -20,22 +20,22 @@ limitations under the License. */ #include #include #include -#include "mkldnn.hpp" +#include "dnnl.hpp" #include "paddle/fluid/framework/operator.h" #include "paddle/fluid/platform/place.h" #include "paddle/fluid/platform/profiler.h" namespace paddle { #ifdef PADDLE_WITH_MKLDNN -using MKLDNNMemoryFormat = mkldnn::memory::format_tag; +using MKLDNNMemoryFormat = dnnl::memory::format_tag; #endif namespace platform { -using MKLDNNStream = mkldnn::stream; -using MKLDNNEngine = mkldnn::engine; -using MKLDNNMemory = mkldnn::memory; -using MKLDNNMemoryDescriptor = mkldnn::memory::desc; -using MKLDNNPrimitive = mkldnn::primitive; -using MKLDNNPrimitiveDesc = mkldnn::handle; +using MKLDNNStream = dnnl::stream; +using MKLDNNEngine = dnnl::engine; +using MKLDNNMemory = dnnl::memory; +using MKLDNNMemoryDescriptor = dnnl::memory::desc; +using MKLDNNPrimitive = dnnl::primitive; +using MKLDNNPrimitiveDesc = dnnl::handle; typedef std::unique_ptr MKLDNNStreamPtr; typedef std::unique_ptr MKLDNNEnginePtr; @@ -62,7 +62,7 @@ using tf_pd = typename Type::primitive_desc; template std::shared_ptr> MKLDNNFwdPrimitiveDesc(const Engine& e, Args&&... args) { - auto desc = tf_desc(mkldnn::prop_kind::forward, (args)...); + auto desc = tf_desc(dnnl::prop_kind::forward, (args)...); auto pd = new tf_pd(desc, e); return std::shared_ptr>(pd); } @@ -129,10 +129,10 @@ struct mkldnn_dummy_primitive { struct desc {}; }; -inline mkldnn::memory::desc MKLDNNMemDesc(const std::vector& dims, - mkldnn::memory::data_type data_type, - MKLDNNMemoryFormat format) { - return mkldnn::memory::desc({dims}, data_type, format); +inline dnnl::memory::desc MKLDNNMemDesc(const std::vector& dims, + dnnl::memory::data_type data_type, + MKLDNNMemoryFormat format) { + return dnnl::memory::desc({dims}, data_type, format); } inline void ClearMKLDNNCache(const platform::Place& place, @@ -159,36 +159,35 @@ inline void DontClearMKLDNNCache(const platform::Place& place) { } template -mkldnn::memory::data_type MKLDNNGetDataType() { - return mkldnn::memory::data_type::undef; +dnnl::memory::data_type MKLDNNGetDataType() { + return dnnl::memory::data_type::undef; } template <> -inline mkldnn::memory::data_type MKLDNNGetDataType() { - return mkldnn::memory::data_type::f32; +inline dnnl::memory::data_type MKLDNNGetDataType() { + return dnnl::memory::data_type::f32; } template <> -inline mkldnn::memory::data_type MKLDNNGetDataType() { - return mkldnn::memory::data_type::s32; +inline dnnl::memory::data_type MKLDNNGetDataType() { + return dnnl::memory::data_type::s32; } template <> -inline mkldnn::memory::data_type MKLDNNGetDataType() { - return mkldnn::memory::data_type::s8; +inline dnnl::memory::data_type MKLDNNGetDataType() { + return dnnl::memory::data_type::s8; } template <> -inline mkldnn::memory::data_type MKLDNNGetDataType() { - return mkldnn::memory::data_type::u8; +inline dnnl::memory::data_type MKLDNNGetDataType() { + return dnnl::memory::data_type::u8; } template <> -inline mkldnn::memory::data_type -MKLDNNGetDataType() { - return mkldnn::memory::data_type::bf16; +inline dnnl::memory::data_type MKLDNNGetDataType() { + return dnnl::memory::data_type::bf16; } -inline void Reorder(mkldnn::memory src, mkldnn::memory dst, - const mkldnn::engine& engine) { - auto reorder_prim = mkldnn::reorder(src, dst); +inline void Reorder(dnnl::memory src, dnnl::memory dst, + const dnnl::engine& engine) { + auto reorder_prim = dnnl::reorder(src, dst); auto& astream = platform::MKLDNNDeviceContext::tls().get_stream(); platform::RecordEvent record_reorder("int_reorder", platform::EventRole::kUniqueOp); @@ -196,8 +195,7 @@ inline void Reorder(mkldnn::memory src, mkldnn::memory dst, astream.wait(); } -inline mkldnn::memory::format_tag GetMKLDNNFormat( - mkldnn::memory::desc mem_desc) { +inline dnnl::memory::format_tag GetMKLDNNFormat(dnnl::memory::desc mem_desc) { auto ndims = mem_desc.data.ndims; auto strides = mem_desc.data.format_desc.blocking.strides; auto inner_nblks = mem_desc.data.format_desc.blocking.inner_nblks; @@ -205,62 +203,62 @@ inline mkldnn::memory::format_tag GetMKLDNNFormat( auto inner_idxs = mem_desc.data.format_desc.blocking.inner_idxs; if (ndims == 1) { - return mkldnn::memory::format_tag::x; + return dnnl::memory::format_tag::x; } else if (ndims == 2) { if (inner_nblks == 0) { if (strides[0] >= strides[1]) { - return mkldnn::memory::format_tag::nc; + return dnnl::memory::format_tag::nc; } else { - return mkldnn::memory::format_tag::cn; + return dnnl::memory::format_tag::cn; } } } else if (ndims == 3) { if (inner_nblks == 0) { if (strides[0] >= strides[1] && strides[1] >= strides[2]) { - return mkldnn::memory::format_tag::ncw; + return dnnl::memory::format_tag::ncw; } else if (strides[1] >= strides[0] && strides[0] >= strides[2]) { - return mkldnn::memory::format_tag::ntc; + return dnnl::memory::format_tag::ntc; } else { - return mkldnn::memory::format_tag::nwc; + return dnnl::memory::format_tag::nwc; } } } else if (ndims == 4) { if (inner_nblks == 0) { if (strides[0] >= strides[1] && strides[1] >= strides[2] && strides[2] >= strides[3]) { - return mkldnn::memory::format_tag::nchw; + return dnnl::memory::format_tag::nchw; } else if (strides[2] >= strides[3] && strides[3] >= strides[1] && strides[1] >= strides[0]) { - return mkldnn::memory::format_tag::cdba; + return dnnl::memory::format_tag::cdba; } else { - return mkldnn::memory::format_tag::nhwc; + return dnnl::memory::format_tag::nhwc; } } else if (inner_nblks == 1) { if (inner_blks[0] == 16 && inner_idxs[0] == 1) { - return mkldnn::memory::format_tag::nChw16c; + return dnnl::memory::format_tag::nChw16c; } else if (inner_blks[0] == 8 && inner_idxs[0] == 1) { - return mkldnn::memory::format_tag::nChw8c; + return dnnl::memory::format_tag::nChw8c; } else if (inner_blks[0] == 8 && inner_idxs[0] == 0) { if (strides[0] >= strides[2] && strides[2] >= strides[3] && strides[3] >= strides[1]) { - return mkldnn::memory::format_tag::Acdb8a; + return dnnl::memory::format_tag::Acdb8a; } } else if (inner_blks[0] == 4 && inner_idxs[0] == 1) { - return mkldnn::memory::format_tag::nChw4c; + return dnnl::memory::format_tag::nChw4c; } else if (inner_blks[0] == 16 && inner_idxs[0] == 0) { if (strides[0] >= strides[2] && strides[2] >= strides[3] && strides[3] >= strides[1]) { - return mkldnn::memory::format_tag::Acdb16a; + return dnnl::memory::format_tag::Acdb16a; } } } else if (inner_nblks == 2) { if (inner_blks[0] == 16 && inner_blks[1] == 16) { if (inner_idxs[0] == 1 && inner_idxs[1] == 0) { - return mkldnn::memory::format_tag::OIhw16i16o; + return dnnl::memory::format_tag::OIhw16i16o; } } else if (inner_blks[0] == 8 && inner_blks[1] == 8) { if (inner_idxs[0] == 1 && inner_idxs[1] == 0) { - return mkldnn::memory::format_tag::OIhw8i8o; + return dnnl::memory::format_tag::OIhw8i8o; } } } @@ -268,38 +266,38 @@ inline mkldnn::memory::format_tag GetMKLDNNFormat( if (inner_nblks == 0) { if (strides[0] >= strides[1] && strides[1] >= strides[2] && strides[2] >= strides[3] && strides[3] >= strides[4]) { - return mkldnn::memory::format_tag::ncdhw; + return dnnl::memory::format_tag::ncdhw; } else { - return mkldnn::memory::format_tag::ndhwc; + return dnnl::memory::format_tag::ndhwc; } } else if (inner_nblks == 1) { if (inner_blks[0] == 8 && inner_idxs[0] == 0) { if (strides[0] >= strides[2] && strides[2] >= strides[3] && strides[3] >= strides[4] && strides[4] >= strides[1]) { - return mkldnn::memory::format_tag::Acdeb8a; + return dnnl::memory::format_tag::Acdeb8a; } if (strides[0] >= strides[1] && strides[1] >= strides[2] && strides[2] >= strides[3] && strides[3] >= strides[4]) { - return mkldnn::memory::format_tag::Abcde8a; + return dnnl::memory::format_tag::Abcde8a; } } else if (inner_blks[0] == 8 && inner_idxs[0] == 1) { if (strides[0] >= strides[1] && strides[1] >= strides[2] && strides[2] >= strides[3] && strides[3] >= strides[4]) { - return mkldnn::memory::format_tag::aBcde8b; + return dnnl::memory::format_tag::aBcde8b; } } else if (inner_blks[0] == 16 && inner_idxs[0] == 0) { if (strides[0] >= strides[2] && strides[2] >= strides[3] && strides[3] >= strides[4] && strides[4] >= strides[1]) { - return mkldnn::memory::format_tag::Acdeb16a; + return dnnl::memory::format_tag::Acdeb16a; } if (strides[0] >= strides[1] && strides[1] >= strides[2] && strides[2] >= strides[3] && strides[3] >= strides[4]) { - return mkldnn::memory::format_tag::Abcde16a; + return dnnl::memory::format_tag::Abcde16a; } } else if (inner_blks[0] == 16 && inner_idxs[0] == 1) { if (strides[0] >= strides[1] && strides[1] >= strides[2] && strides[2] >= strides[3] && strides[3] >= strides[4]) { - return mkldnn::memory::format_tag::aBcde16b; + return dnnl::memory::format_tag::aBcde16b; } } } @@ -308,7 +306,7 @@ inline mkldnn::memory::format_tag GetMKLDNNFormat( if (strides[0] >= strides[1] && strides[1] >= strides[2] && strides[2] >= strides[3] && strides[3] >= strides[4] && strides[4] >= strides[5]) { - return mkldnn::memory::format_tag::abcdef; + return dnnl::memory::format_tag::abcdef; } } } @@ -325,10 +323,10 @@ inline mkldnn::memory::format_tag GetMKLDNNFormat( // for (int i=0;i inline void AppendKey(std::string* key, - const mkldnn::memory::format_tag& format) { + const dnnl::memory::format_tag& format) { key->append(std::to_string(static_cast(format))); } template <> inline void AppendKey(std::string* key, - const mkldnn::memory::data_type& data_type) { + const dnnl::memory::data_type& data_type) { key->append(std::to_string(static_cast(data_type))); } template <> -inline void AppendKey(std::string* key, const mkldnn::algorithm& algorithm) { +inline void AppendKey(std::string* key, const dnnl::algorithm& algorithm) { key->append(std::to_string(static_cast(algorithm))); } template <> inline void AppendKey(std::string* key, - const mkldnn::normalization_flags& flags) { + const dnnl::normalization_flags& flags) { key->append(std::to_string(static_cast(flags))); } diff --git a/paddle/fluid/platform/mkldnn_reuse.h b/paddle/fluid/platform/mkldnn_reuse.h index 2bb08bcf81b6c1ce0c9ba8661f29d80365390e80..c16137b50dbf713e94cf65a50e610849a74b1416 100644 --- a/paddle/fluid/platform/mkldnn_reuse.h +++ b/paddle/fluid/platform/mkldnn_reuse.h @@ -33,14 +33,14 @@ namespace platform { using framework::DataLayout; using framework::Tensor; using user_function = std::function(const float*)>; -using memory = mkldnn::memory; +using memory = dnnl::memory; template class MKLDNNHandlerNoCachingT { public: - MKLDNNHandlerNoCachingT(mkldnn::engine engine, platform::Place cpu_place) + MKLDNNHandlerNoCachingT(dnnl::engine engine, platform::Place cpu_place) : engine_(engine), place_(cpu_place), fwd_pd_(nullptr), bwd_pd_(nullptr) { platform::MKLDNNDeviceContext::tls().log_lib_version(); } @@ -60,7 +60,7 @@ class MKLDNNHandlerNoCachingT { return std::make_shared(*bwd_w_pd_); } - std::shared_ptr AcquireSrcMemory( + std::shared_ptr AcquireSrcMemory( const framework::Tensor* input) { const T* input_data = input->data(); return this->AcquireMemoryFromPrimitive(fwd_pd_->src_desc(), @@ -68,33 +68,33 @@ class MKLDNNHandlerNoCachingT { } template - std::shared_ptr AcquireDstMemory(framework::Tensor* output) { + std::shared_ptr AcquireDstMemory(framework::Tensor* output) { T_out* ptr = output->mutable_data(place_, fwd_pd_->dst_desc().get_size()); return this->AcquireMemoryFromPrimitive(fwd_pd_->dst_desc(), ptr); } template - std::shared_ptr AcquireDstMemory(void) { + std::shared_ptr AcquireDstMemory(void) { return this->AcquireMemoryFromPrimitive(fwd_pd_->dst_desc()); } template - std::shared_ptr AcquireDstMemory( + std::shared_ptr AcquireDstMemory( const framework::Tensor* output) { const T_out* output_data = output->data(); return this->AcquireMemoryFromPrimitive(bwd_pd_->dst_desc(), to_void_cast(output_data)); } - std::shared_ptr AcquireDiffDstMemory( + std::shared_ptr AcquireDiffDstMemory( const framework::Tensor* diffdst) { const T* ptr = diffdst->data(); return this->AcquireMemoryFromPrimitive(bwd_pd_->diff_dst_desc(), to_void_cast(ptr)); } - std::shared_ptr AcquireDiffSrcMemory( + std::shared_ptr AcquireDiffSrcMemory( framework::Tensor* diffsrc) { T* ptr = diffsrc->mutable_data(place_, bwd_pd_->diff_src_desc().get_size()); @@ -102,7 +102,7 @@ class MKLDNNHandlerNoCachingT { } // Buffer of given Tensor is used for oneDNN computation - std::shared_ptr AcquireDiffWeightsMemory( + std::shared_ptr AcquireDiffWeightsMemory( framework::Tensor* diff_weights) { PADDLE_ENFORCE_NOT_NULL( bwd_w_pd_, @@ -115,7 +115,7 @@ class MKLDNNHandlerNoCachingT { } // Buffer is allocated by oneDNN to store computation results - std::shared_ptr AcquireDiffWeightsMemory(void) { + std::shared_ptr AcquireDiffWeightsMemory(void) { PADDLE_ENFORCE_NOT_NULL( bwd_w_pd_, platform::errors::Unavailable( @@ -179,37 +179,36 @@ class MKLDNNHandlerNoCachingT { bwd_desc, engine_, *fwd_pd_); } - std::shared_ptr AcquireMemoryFromPrimitive( - mkldnn::memory::desc md, void* ptr) { - return std::make_shared(md, engine_, ptr); + std::shared_ptr AcquireMemoryFromPrimitive( + dnnl::memory::desc md, void* ptr) { + return std::make_shared(md, engine_, ptr); } - std::shared_ptr AcquireMemoryFromPrimitive( - mkldnn::memory::desc md) { - return std::make_shared(md, engine_); + std::shared_ptr AcquireMemoryFromPrimitive( + dnnl::memory::desc md) { + return std::make_shared(md, engine_); } - void AcquireReorder(const std::shared_ptr& user_memory_p, - const std::shared_ptr& target_memory_p) { + void AcquireReorder(const std::shared_ptr& user_memory_p, + const std::shared_ptr& target_memory_p) { auto reorder_p = - std::make_shared(*user_memory_p, *target_memory_p); + std::make_shared(*user_memory_p, *target_memory_p); auto& astream = platform::MKLDNNDeviceContext::tls().get_stream(); platform::RecordEvent record_reorder("int_reorder", platform::EventRole::kUniqueOp); - reorder_p->execute(astream, {{MKLDNN_ARG_FROM, *user_memory_p}, - {MKLDNN_ARG_TO, *target_memory_p}}); + reorder_p->execute(astream, {{DNNL_ARG_FROM, *user_memory_p}, + {DNNL_ARG_TO, *target_memory_p}}); astream.wait(); } template - std::shared_ptr AcquireMemoryWithReorder( - const mkldnn::memory::desc& user_md, - const mkldnn::memory::desc& target_md, void* ptr, - bool is_persistent = false, + std::shared_ptr AcquireMemoryWithReorder( + const dnnl::memory::desc& user_md, const dnnl::memory::desc& target_md, + void* ptr, bool is_persistent = false, std::function(const F*)> custom_reorder_func = {}) { - std::shared_ptr target_memory_p; + std::shared_ptr target_memory_p; if (custom_reorder_func) { auto reordered_data = custom_reorder_func(reinterpret_cast(ptr)); @@ -217,15 +216,15 @@ class MKLDNNHandlerNoCachingT { } auto user_memory_p = std::make_shared(user_md, engine_, ptr); if (user_md != target_md) { - target_memory_p = std::make_shared(target_md, engine_); + target_memory_p = std::make_shared(target_md, engine_); auto reorder_p = std::make_shared(*user_memory_p, *target_memory_p); auto& astream = platform::MKLDNNDeviceContext::tls().get_stream(); platform::RecordEvent record_reorder("int_reorder", platform::EventRole::kUniqueOp); - reorder_p->execute(astream, {{MKLDNN_ARG_FROM, *user_memory_p}, - {MKLDNN_ARG_TO, *target_memory_p}}); + reorder_p->execute(astream, {{DNNL_ARG_FROM, *user_memory_p}, + {DNNL_ARG_TO, *target_memory_p}}); astream.wait(); } else { target_memory_p = user_memory_p; @@ -233,7 +232,7 @@ class MKLDNNHandlerNoCachingT { return target_memory_p; } - mkldnn::engine engine_; + dnnl::engine engine_; platform::Place place_; std::shared_ptr fwd_pd_; std::shared_ptr bwd_pd_; @@ -245,7 +244,7 @@ template class MKLDNNHandlerT { public: - MKLDNNHandlerT(const MKLDNNDeviceContext& dev_ctx, mkldnn::engine engine, + MKLDNNHandlerT(const MKLDNNDeviceContext& dev_ctx, dnnl::engine engine, platform::Place cpu_place, const std::string& base_key) : dev_ctx_(dev_ctx), engine_(engine), @@ -294,7 +293,7 @@ class MKLDNNHandlerT { return backward_p; } - std::shared_ptr AcquireSrcMemory( + std::shared_ptr AcquireSrcMemory( const framework::Tensor* input) { const T* input_data = input->data(); return this->AcquireMemoryFromPrimitive( @@ -302,7 +301,7 @@ class MKLDNNHandlerT { } template - std::shared_ptr AcquireDstMemory(framework::Tensor* output) { + std::shared_ptr AcquireDstMemory(framework::Tensor* output) { T_out* ptr = output->mutable_data(place_, fwd_pd_->dst_desc().get_size()); return this->AcquireMemoryFromPrimitive(fwd_pd_->dst_desc(), ptr, @@ -310,12 +309,12 @@ class MKLDNNHandlerT { } template - std::shared_ptr AcquireDstMemory(void) { + std::shared_ptr AcquireDstMemory(void) { return this->AcquireMemoryFromPrimitive(fwd_pd_->dst_desc(), "@dstt_mem_p"); } template - std::shared_ptr AcquireDstMemory( + std::shared_ptr AcquireDstMemory( const framework::Tensor* output) { const T_out* output_data = output->data(); return this->AcquireMemoryFromPrimitive(bwd_pd_->dst_desc(), @@ -323,14 +322,14 @@ class MKLDNNHandlerT { "@bwd-dst_mem_p"); } - std::shared_ptr AcquireDiffDstMemory( + std::shared_ptr AcquireDiffDstMemory( const framework::Tensor* diffdst) { const T* ptr = diffdst->data(); return this->AcquireMemoryFromPrimitive( bwd_pd_->diff_dst_desc(), to_void_cast(ptr), "@diff_dst_mem_p"); } - std::shared_ptr AcquireDiffSrcMemory( + std::shared_ptr AcquireDiffSrcMemory( framework::Tensor* diffsrc) { T* ptr = diffsrc->mutable_data(place_, bwd_pd_->diff_src_desc().get_size()); @@ -339,7 +338,7 @@ class MKLDNNHandlerT { } // Buffer of given Tensor is used for oneDNN computation - std::shared_ptr AcquireDiffWeightsMemory( + std::shared_ptr AcquireDiffWeightsMemory( framework::Tensor* diff_weights) { PADDLE_ENFORCE_NOT_NULL( bwd_w_pd_, @@ -352,7 +351,7 @@ class MKLDNNHandlerT { } // Buffer is allocated by oneDNN to store computation results - std::shared_ptr AcquireDiffWeightsMemory(void) { + std::shared_ptr AcquireDiffWeightsMemory(void) { PADDLE_ENFORCE_NOT_NULL( bwd_w_pd_, platform::errors::Unavailable( @@ -467,19 +466,19 @@ class MKLDNNHandlerT { } } - std::shared_ptr AcquireMemoryFromPrimitive( + std::shared_ptr AcquireMemoryFromPrimitive( const std::string& suffix) { - return std::static_pointer_cast( + return std::static_pointer_cast( dev_ctx_.GetBlob(key_ + suffix)); } - std::shared_ptr AcquireMemoryFromPrimitive( - mkldnn::memory::desc md, void* ptr, const std::string& suffix) { + std::shared_ptr AcquireMemoryFromPrimitive( + dnnl::memory::desc md, void* ptr, const std::string& suffix) { const auto local_key = key_ + suffix; auto mem_p = - std::static_pointer_cast(dev_ctx_.GetBlob(local_key)); + std::static_pointer_cast(dev_ctx_.GetBlob(local_key)); if (mem_p == nullptr) { - mem_p = std::make_shared(md, engine_, ptr); + mem_p = std::make_shared(md, engine_, ptr); dev_ctx_.SetBlob(local_key, mem_p); } else { mem_p->set_data_handle(ptr); @@ -487,37 +486,36 @@ class MKLDNNHandlerT { return mem_p; } - std::shared_ptr AcquireMemoryFromPrimitive( - mkldnn::memory::desc md, const std::string& suffix) { + std::shared_ptr AcquireMemoryFromPrimitive( + dnnl::memory::desc md, const std::string& suffix) { const auto local_key = key_ + suffix; auto mem_p = - std::static_pointer_cast(dev_ctx_.GetBlob(local_key)); + std::static_pointer_cast(dev_ctx_.GetBlob(local_key)); if (mem_p == nullptr) { - mem_p = std::make_shared(md, engine_); + mem_p = std::make_shared(md, engine_); dev_ctx_.SetBlob(local_key, mem_p); } return mem_p; } - void AcquireReorder(const std::shared_ptr& user_memory_p, - const std::shared_ptr& target_memory_p) { + void AcquireReorder(const std::shared_ptr& user_memory_p, + const std::shared_ptr& target_memory_p) { auto reorder_p = - std::make_shared(*user_memory_p, *target_memory_p); + std::make_shared(*user_memory_p, *target_memory_p); auto& astream = platform::MKLDNNDeviceContext::tls().get_stream(); platform::RecordEvent record_reorder("int_reorder", platform::EventRole::kUniqueOp); - reorder_p->execute(astream, {{MKLDNN_ARG_FROM, *user_memory_p}, - {MKLDNN_ARG_TO, *target_memory_p}}); + reorder_p->execute(astream, {{DNNL_ARG_FROM, *user_memory_p}, + {DNNL_ARG_TO, *target_memory_p}}); astream.wait(); } template - std::shared_ptr AcquireMemoryWithReorder( - const mkldnn::memory::desc& user_md, - const mkldnn::memory::desc& target_md, void* ptr, - const std::string& suffix, bool is_persistent = false, + std::shared_ptr AcquireMemoryWithReorder( + const dnnl::memory::desc& user_md, const dnnl::memory::desc& target_md, + void* ptr, const std::string& suffix, bool is_persistent = false, std::function(const F*)> custom_reorder_func = {}, const std::vector& scale_data = {1.0f}, int mask = 0) { const auto target_key = key_ + suffix + "_target"; @@ -537,7 +535,7 @@ class MKLDNNHandlerT { auto user_memory_p = std::make_shared(user_md, engine_, ptr); if (user_md != target_md) { - target_memory_p = std::make_shared(target_md, engine_); + target_memory_p = std::make_shared(target_md, engine_); dnnl::reorder::primitive_desc reorder_pdesc; if (is_int8()) { dnnl::primitive_attr attr; @@ -554,8 +552,8 @@ class MKLDNNHandlerT { auto& astream = platform::MKLDNNDeviceContext::tls().get_stream(); platform::RecordEvent record_reorder("int_reorder", platform::EventRole::kUniqueOp); - reorder_p->execute(astream, {{MKLDNN_ARG_FROM, *user_memory_p}, - {MKLDNN_ARG_TO, *target_memory_p}}); + reorder_p->execute(astream, {{DNNL_ARG_FROM, *user_memory_p}, + {DNNL_ARG_TO, *target_memory_p}}); astream.wait(); } else { target_memory_p = user_memory_p; @@ -571,27 +569,26 @@ class MKLDNNHandlerT { // TODO(jczaja): Here we detect if reorder is cached it means it is needed // need to change this to get rid of keys - auto reorder_p = std::static_pointer_cast( + auto reorder_p = std::static_pointer_cast( dev_ctx_.GetBlob(key_reorder_p)); if (reorder_p != nullptr) { platform::RecordEvent record_reorder("int_reorder", platform::EventRole::kUniqueOp); - reorder_p->execute(astream, {{MKLDNN_ARG_FROM, *user_memory_p}, - {MKLDNN_ARG_TO, *target_memory_p}}); + reorder_p->execute(astream, {{DNNL_ARG_FROM, *user_memory_p}, + {DNNL_ARG_TO, *target_memory_p}}); astream.wait(); } } return target_memory_p; } - std::shared_ptr AcquireMemory(const std::string& suffix) { + std::shared_ptr AcquireMemory(const std::string& suffix) { const auto local_key = key_ + suffix; - return std::static_pointer_cast( - dev_ctx_.GetBlob(local_key)); + return std::static_pointer_cast(dev_ctx_.GetBlob(local_key)); } const MKLDNNDeviceContext& dev_ctx_; - mkldnn::engine engine_; + dnnl::engine engine_; platform::Place place_; std::string key_common_; std::string key_; @@ -605,7 +602,7 @@ class BinaryMKLDNNHandler : public platform::MKLDNNHandlerNoCachingT { public: BinaryMKLDNNHandler(const dnnl::algorithm algo, const int axis, - const mkldnn::engine engine, platform::Place cpu_place, + const dnnl::engine engine, platform::Place cpu_place, const Tensor* x, const Tensor* y, Tensor* z, float scale_x, float scale_y, float scale_z, const dnnl::post_ops& post_ops = dnnl::post_ops()) @@ -662,7 +659,7 @@ class BinaryMKLDNNHandler this->AcquireForwardPrimitiveDescriptor(attributes, algo, src0_md, src1_md, dst_md); } - std::shared_ptr AcquireSecondSrcMemory( + std::shared_ptr AcquireSecondSrcMemory( const framework::Tensor* input) { const T* input_data = input->data(); return this->AcquireMemoryFromPrimitive(this->fwd_pd_->src1_desc(), @@ -707,7 +704,7 @@ class BroadcastDataMKLDNNHandler : public platform::MKLDNNHandlerNoCachingT { public: BroadcastDataMKLDNNHandler(const dnnl::algorithm algo, - const mkldnn::engine engine, + const dnnl::engine engine, platform::Place cpu_place, const Tensor* out, const Tensor* x, float scale_x, float scale_y, const std::vector& input_dims) @@ -735,7 +732,7 @@ class BroadcastDataMKLDNNHandler } template - std::shared_ptr AcquireDstMemory(framework::Tensor* output) { + std::shared_ptr AcquireDstMemory(framework::Tensor* output) { T_out* ptr = output->mutable_data( this->place_, this->fwd_pd_->dst_desc().get_size()); memset(ptr, 0, this->fwd_pd_->dst_desc().get_size()); @@ -748,7 +745,7 @@ class ReductionMKLDNNHandler : public platform::MKLDNNHandlerNoCachingT { public: ReductionMKLDNNHandler(const dnnl::algorithm algo, const float p, - const float eps, const mkldnn::engine engine, + const float eps, const dnnl::engine engine, platform::Place cpu_place, const Tensor* x, const Tensor* y, std::vector y_tz, const dnnl::primitive_attr& attr = NULL) @@ -777,16 +774,16 @@ class ReductionMKLDNNHandler template class ActivationMKLDNNHandler - : public MKLDNNHandlerNoCachingT { + : public MKLDNNHandlerNoCachingT { public: - ActivationMKLDNNHandler(mkldnn::algorithm algorithm, + ActivationMKLDNNHandler(dnnl::algorithm algorithm, const framework::ExecutionContext& ctx, - const mkldnn::engine engine, Place cpu_place, + const dnnl::engine engine, Place cpu_place, const framework::Tensor* in_x) - : platform::MKLDNNHandlerNoCachingT(engine, - cpu_place) { + : platform::MKLDNNHandlerNoCachingT(engine, + cpu_place) { float alpha = ctx.HasAttr("alpha") ? ctx.Attr("alpha") : 0; float beta = ctx.HasAttr("beta") ? ctx.Attr("beta") : 0; @@ -811,7 +808,7 @@ class ActivationMKLDNNHandler : ctx.Attr("max"); } else { // paddle uses beta but mkldnn uses alpha for swish - if (algorithm == mkldnn::algorithm::eltwise_swish) { + if (algorithm == dnnl::algorithm::eltwise_swish) { std::swap(alpha, beta); } else if (algorithm == dnnl::algorithm::eltwise_bounded_relu) { alpha = ctx.Attr("threshold"); @@ -827,24 +824,24 @@ class ActivationMKLDNNHandler auto src_tz = framework::vectorize(in_x->dims()); auto src_fmt = src_tz.size() == 2 ? MKLDNNMemoryFormat::nc : in_x->format(); auto md = - mkldnn::memory::desc(src_tz, platform::MKLDNNGetDataType(), src_fmt); + dnnl::memory::desc(src_tz, platform::MKLDNNGetDataType(), src_fmt); - this->AcquireForwardPrimitiveDescriptor(mkldnn::prop_kind::forward_training, + this->AcquireForwardPrimitiveDescriptor(dnnl::prop_kind::forward_training, algorithm, md, alpha, beta); } - ActivationMKLDNNHandler(mkldnn::algorithm algorithm, + ActivationMKLDNNHandler(dnnl::algorithm algorithm, const framework::ExecutionContext& ctx, - const mkldnn::engine engine, Place cpu_place, + const dnnl::engine engine, Place cpu_place, const framework::Tensor* in_x, const Tensor* out_grad) - : platform::MKLDNNHandlerNoCachingT(engine, - cpu_place) { + : platform::MKLDNNHandlerNoCachingT(engine, + cpu_place) { float alpha = ctx.HasAttr("alpha") ? ctx.Attr("alpha") : 0; float beta = ctx.HasAttr("beta") ? ctx.Attr("beta") : 0; // paddle uses beta but mkldnn uses alpha for swish - if (algorithm == mkldnn::algorithm::eltwise_swish) { + if (algorithm == dnnl::algorithm::eltwise_swish) { std::swap(alpha, beta); } else if (algorithm == dnnl::algorithm::eltwise_bounded_relu) { alpha = ctx.Attr("threshold"); @@ -870,13 +867,13 @@ class ActivationMKLDNNHandler auto src_md = platform::MKLDNNMemDesc( dims, platform::MKLDNNGetDataType(), src_fmt); - this->AcquireForwardPrimitiveDescriptor(mkldnn::prop_kind::forward_training, + this->AcquireForwardPrimitiveDescriptor(dnnl::prop_kind::forward_training, algorithm, src_md, alpha, beta); this->AcquireBackwardPrimitiveDescriptor(algorithm, diff_dst_md, src_md, alpha, beta); } - std::shared_ptr AcquireBackwardSrcMemory( + std::shared_ptr AcquireBackwardSrcMemory( const framework::Tensor* input) { const T* input_data = input->data(); return this->AcquireMemoryFromPrimitive(this->bwd_pd_->src_desc(), @@ -888,7 +885,7 @@ class ReorderMKLDNNHandler { public: ReorderMKLDNNHandler(std::vector& dims, // NOLINT framework::proto::VarType::Type vtype, - mkldnn::memory::data_type dtype, mkldnn::engine engine) + dnnl::memory::data_type dtype, dnnl::engine engine) : dims_(dims), vtype_(vtype), vtype_dst_(vtype), @@ -898,10 +895,9 @@ class ReorderMKLDNNHandler { ReorderMKLDNNHandler(std::vector& dims, // NOLINT framework::proto::VarType::Type vtype, - mkldnn::memory::data_type dtype, + dnnl::memory::data_type dtype, framework::proto::VarType::Type vtype_dst, - mkldnn::memory::data_type dtype_dst, - mkldnn::engine engine) + dnnl::memory::data_type dtype_dst, dnnl::engine engine) : dims_(dims), vtype_(vtype), vtype_dst_(vtype_dst), @@ -909,56 +905,56 @@ class ReorderMKLDNNHandler { dtype_dst_(dtype_dst), engine_(engine) {} - std::shared_ptr AcquireSrcMemory( - const MKLDNNMemoryFormat& fmt, void* ptr) { - auto md = mkldnn::memory::desc(dims_, dtype_, fmt); - return std::make_shared(md, engine_, ptr); + std::shared_ptr AcquireSrcMemory(const MKLDNNMemoryFormat& fmt, + void* ptr) { + auto md = dnnl::memory::desc(dims_, dtype_, fmt); + return std::make_shared(md, engine_, ptr); } - std::shared_ptr AcquireSubmemory( + std::shared_ptr AcquireSubmemory( const std::vector& dims, const std::vector& offset, - const std::shared_ptr& mem_p) { + const std::shared_ptr& mem_p) { auto sub_md = mem_p->get_desc().submemory_desc(dims, {offset}); - auto sub_mem_p = std::make_shared(sub_md, engine_, - mem_p->get_data_handle()); + auto sub_mem_p = std::make_shared(sub_md, engine_, + mem_p->get_data_handle()); return sub_mem_p; } - std::shared_ptr AcquireDstMemory( - framework::Tensor* output, const MKLDNNMemoryFormat& fmt, - platform::Place place) { + std::shared_ptr AcquireDstMemory(framework::Tensor* output, + const MKLDNNMemoryFormat& fmt, + platform::Place place) { auto dst_md = platform::MKLDNNMemDesc(dims_, dtype_dst_, fmt); auto dst_data = output->mutable_data(place, vtype_dst_, dst_md.get_size()); - return std::make_shared(dst_md, engine_, dst_data); + return std::make_shared(dst_md, engine_, dst_data); } - std::shared_ptr AcquireDstMemory( + std::shared_ptr AcquireDstMemory( framework::Tensor* output, const std::vector& dims, const MKLDNNMemoryFormat& fmt, platform::Place place) { auto dst_md = platform::MKLDNNMemDesc(dims, dtype_dst_, fmt); auto dst_data = output->mutable_data(place, vtype_dst_, dst_md.get_size()); - return std::make_shared(dst_md, engine_, dst_data); + return std::make_shared(dst_md, engine_, dst_data); } - std::shared_ptr AcquireReorder( - std::shared_ptr dst_memory_p, - std::shared_ptr src_memory_p) { - return std::make_shared(*(src_memory_p), *(dst_memory_p)); + std::shared_ptr AcquireReorder( + std::shared_ptr dst_memory_p, + std::shared_ptr src_memory_p) { + return std::make_shared(*(src_memory_p), *(dst_memory_p)); } private: std::vector dims_; framework::proto::VarType::Type vtype_, vtype_dst_; - mkldnn::memory::data_type dtype_, dtype_dst_; - mkldnn::engine engine_; + dnnl::memory::data_type dtype_, dtype_dst_; + dnnl::engine engine_; }; template static void SetDstMemoryQuantized( const framework::ExecutionContext& ctx, framework::Tensor* output, - std::vector dst_tz, const mkldnn::engine& engine, - std::shared_ptr& dst_md, // NOLINT - std::shared_ptr& dst_memory, // NOLINT + std::vector dst_tz, const dnnl::engine& engine, + std::shared_ptr& dst_md, // NOLINT + std::shared_ptr& dst_memory, // NOLINT MKLDNNMemoryFormat output_format) { T* output_data = output->mutable_data(ctx.GetPlace()); const size_t dst_dims = dst_tz.size(); @@ -974,9 +970,9 @@ static void SetDstMemoryQuantized( {dst_tz}, paddle::framework::ToMKLDNNDataType( framework::DataTypeTrait::DataType()), dst_fmt); - dst_md.reset(new mkldnn::memory::desc(tmp_dst_md)); + dst_md.reset(new dnnl::memory::desc(tmp_dst_md)); dst_memory.reset( - new mkldnn::memory(*dst_md, engine, to_void_cast(output_data))); + new dnnl::memory(*dst_md, engine, to_void_cast(output_data))); } } // namespace platform