未验证 提交 7db7a0ec 编写于 作者: P piotrekobiIntel 提交者: GitHub

Changed second batch of deprecated mkldnn header and function names to new oneDNN names (#37351)

* Add second batch of deprecated mkldnn namespace and macro changes

* Unlock CI

* Fix temporary namespace alias placing
上级 10d8d6b6
...@@ -90,9 +90,9 @@ class Tensor { ...@@ -90,9 +90,9 @@ class Tensor {
#ifdef PADDLE_WITH_MKLDNN #ifdef PADDLE_WITH_MKLDNN
public: public:
inline mkldnn::memory::format_tag format() const { return format_; } inline dnnl::memory::format_tag format() const { return format_; }
inline void set_format(const mkldnn::memory::format_tag format) { inline void set_format(const dnnl::memory::format_tag format) {
format_ = format; format_ = format;
} }
...@@ -106,7 +106,7 @@ class Tensor { ...@@ -106,7 +106,7 @@ class Tensor {
* this field. * this field.
*/ */
mkldnn::memory::format_tag format_ = mkldnn::memory::format_tag::undef; dnnl::memory::format_tag format_ = dnnl::memory::format_tag::undef;
#endif #endif
public: public:
......
...@@ -30,9 +30,9 @@ namespace operators { ...@@ -30,9 +30,9 @@ namespace operators {
using framework::DataLayout; using framework::DataLayout;
using framework::Tensor; using framework::Tensor;
using mkldnn::memory; using dnnl::memory;
using mkldnn::primitive; using dnnl::primitive;
using mkldnn::stream; using dnnl::stream;
using platform::GetMKLDNNFormat; using platform::GetMKLDNNFormat;
using platform::MKLDNNDeviceContext; using platform::MKLDNNDeviceContext;
using platform::to_void_cast; using platform::to_void_cast;
...@@ -75,7 +75,7 @@ class MKLDNNActivationGradKernel ...@@ -75,7 +75,7 @@ class MKLDNNActivationGradKernel
template <typename T> template <typename T>
void eltwise_forward(const framework::ExecutionContext &ctx, void eltwise_forward(const framework::ExecutionContext &ctx,
mkldnn::algorithm algorithm) { dnnl::algorithm algorithm) {
PADDLE_ENFORCE_EQ(platform::is_cpu_place(ctx.GetPlace()), true, PADDLE_ENFORCE_EQ(platform::is_cpu_place(ctx.GetPlace()), true,
paddle::platform::errors::PreconditionNotMet( paddle::platform::errors::PreconditionNotMet(
"Operator DNNL eletwise_forward must use CPUPlace")); "Operator DNNL eletwise_forward must use CPUPlace"));
...@@ -101,8 +101,8 @@ void eltwise_forward(const framework::ExecutionContext &ctx, ...@@ -101,8 +101,8 @@ void eltwise_forward(const framework::ExecutionContext &ctx,
auto activation_p = handler.AcquireForwardPrimitive(); auto activation_p = handler.AcquireForwardPrimitive();
auto &astream = paddle::platform::MKLDNNDeviceContext::tls().get_stream(); auto &astream = paddle::platform::MKLDNNDeviceContext::tls().get_stream();
activation_p->execute(astream, {{MKLDNN_ARG_FROM, *src_memory_p}, activation_p->execute(
{MKLDNN_ARG_TO, *dst_memory_p}}); astream, {{DNNL_ARG_FROM, *src_memory_p}, {DNNL_ARG_TO, *dst_memory_p}});
astream.wait(); astream.wait();
y->set_layout(DataLayout::kMKLDNN); y->set_layout(DataLayout::kMKLDNN);
...@@ -111,7 +111,7 @@ void eltwise_forward(const framework::ExecutionContext &ctx, ...@@ -111,7 +111,7 @@ void eltwise_forward(const framework::ExecutionContext &ctx,
template <typename T> template <typename T>
void eltwise_grad(const framework::ExecutionContext &ctx, void eltwise_grad(const framework::ExecutionContext &ctx,
mkldnn::algorithm algorithm) { dnnl::algorithm algorithm) {
auto &dev_ctx = ctx.template device_context<MKLDNNDeviceContext>(); auto &dev_ctx = ctx.template device_context<MKLDNNDeviceContext>();
const auto &mkldnn_engine = dev_ctx.GetEngine(); const auto &mkldnn_engine = dev_ctx.GetEngine();
...@@ -129,23 +129,23 @@ void eltwise_grad(const framework::ExecutionContext &ctx, ...@@ -129,23 +129,23 @@ void eltwise_grad(const framework::ExecutionContext &ctx,
auto &astream = paddle::platform::MKLDNNDeviceContext::tls().get_stream(); auto &astream = paddle::platform::MKLDNNDeviceContext::tls().get_stream();
activation_backward_p->execute(astream, activation_backward_p->execute(astream,
{{MKLDNN_ARG_SRC, *src_memory_p}, {{DNNL_ARG_SRC, *src_memory_p},
{MKLDNN_ARG_DIFF_DST, *diff_dst_memory_p}, {DNNL_ARG_DIFF_DST, *diff_dst_memory_p},
{MKLDNN_ARG_DIFF_SRC, *diff_src_memory_p}}); {DNNL_ARG_DIFF_SRC, *diff_src_memory_p}});
astream.wait(); astream.wait();
diff_x->set_layout(DataLayout::kMKLDNN); diff_x->set_layout(DataLayout::kMKLDNN);
diff_x->set_format(GetMKLDNNFormat(*diff_src_memory_p)); diff_x->set_format(GetMKLDNNFormat(*diff_src_memory_p));
} }
template <typename T, mkldnn::algorithm algorithm> template <typename T, dnnl::algorithm algorithm>
struct MKLDNNActivationFunc : public BaseActivationFunctor<T> { struct MKLDNNActivationFunc : public BaseActivationFunctor<T> {
void operator()(const framework::ExecutionContext &ctx) const { void operator()(const framework::ExecutionContext &ctx) const {
eltwise_forward<T>(ctx, algorithm); eltwise_forward<T>(ctx, algorithm);
} }
}; };
template <typename T, mkldnn::algorithm algorithm> template <typename T, dnnl::algorithm algorithm>
struct MKLDNNActivationGradFunc : public BaseActivationFunctor<T> { struct MKLDNNActivationGradFunc : public BaseActivationFunctor<T> {
void operator()(const framework::ExecutionContext &ctx) const { void operator()(const framework::ExecutionContext &ctx) const {
eltwise_grad<T>(ctx, algorithm); eltwise_grad<T>(ctx, algorithm);
...@@ -157,9 +157,9 @@ struct GeluMKLDNNFunctor : public BaseActivationFunctor<T> { ...@@ -157,9 +157,9 @@ struct GeluMKLDNNFunctor : public BaseActivationFunctor<T> {
void operator()(const framework::ExecutionContext &ctx) const { void operator()(const framework::ExecutionContext &ctx) const {
const bool approximate = ctx.Attr<bool>("approximate"); const bool approximate = ctx.Attr<bool>("approximate");
if (approximate) { if (approximate) {
eltwise_forward<T>(ctx, mkldnn::algorithm::eltwise_gelu_tanh); eltwise_forward<T>(ctx, dnnl::algorithm::eltwise_gelu_tanh);
} else { } else {
eltwise_forward<T>(ctx, mkldnn::algorithm::eltwise_gelu_erf); eltwise_forward<T>(ctx, dnnl::algorithm::eltwise_gelu_erf);
} }
} }
}; };
...@@ -169,9 +169,9 @@ struct GeluMKLDNNGradFunctor : public BaseActivationFunctor<T> { ...@@ -169,9 +169,9 @@ struct GeluMKLDNNGradFunctor : public BaseActivationFunctor<T> {
void operator()(const framework::ExecutionContext &ctx) const { void operator()(const framework::ExecutionContext &ctx) const {
const bool approximate = ctx.Attr<bool>("approximate"); const bool approximate = ctx.Attr<bool>("approximate");
if (approximate) { if (approximate) {
eltwise_grad<T>(ctx, mkldnn::algorithm::eltwise_gelu_tanh); eltwise_grad<T>(ctx, dnnl::algorithm::eltwise_gelu_tanh);
} else { } else {
eltwise_grad<T>(ctx, mkldnn::algorithm::eltwise_gelu_erf); eltwise_grad<T>(ctx, dnnl::algorithm::eltwise_gelu_erf);
} }
} }
}; };
...@@ -185,75 +185,73 @@ struct SoftplusMKLDNNFunctor : public BaseActivationFunctor<T> { ...@@ -185,75 +185,73 @@ struct SoftplusMKLDNNFunctor : public BaseActivationFunctor<T> {
template <typename T> template <typename T>
using ReluMKLDNNFunctor = using ReluMKLDNNFunctor =
MKLDNNActivationFunc<T, mkldnn::algorithm::eltwise_relu>; MKLDNNActivationFunc<T, dnnl::algorithm::eltwise_relu>;
template <typename T> template <typename T>
using Relu6MKLDNNFunctor = using Relu6MKLDNNFunctor =
MKLDNNActivationFunc<T, mkldnn::algorithm::eltwise_bounded_relu>; MKLDNNActivationFunc<T, dnnl::algorithm::eltwise_bounded_relu>;
template <typename T> template <typename T>
using SwishMKLDNNFunctor = using SwishMKLDNNFunctor =
MKLDNNActivationFunc<T, mkldnn::algorithm::eltwise_swish>; MKLDNNActivationFunc<T, dnnl::algorithm::eltwise_swish>;
template <typename T> template <typename T>
using HardSwishMKLDNNFunctor = using HardSwishMKLDNNFunctor =
MKLDNNActivationFunc<T, mkldnn::algorithm::eltwise_hardswish>; MKLDNNActivationFunc<T, dnnl::algorithm::eltwise_hardswish>;
template <typename T> template <typename T>
using SigmoidMKLDNNFunctor = using SigmoidMKLDNNFunctor =
MKLDNNActivationFunc<T, mkldnn::algorithm::eltwise_logistic>; MKLDNNActivationFunc<T, dnnl::algorithm::eltwise_logistic>;
template <typename T> template <typename T>
using TanhMKLDNNFunctor = using TanhMKLDNNFunctor =
MKLDNNActivationFunc<T, mkldnn::algorithm::eltwise_tanh>; MKLDNNActivationFunc<T, dnnl::algorithm::eltwise_tanh>;
template <typename T> template <typename T>
using SqrtMKLDNNFunctor = using SqrtMKLDNNFunctor =
MKLDNNActivationFunc<T, mkldnn::algorithm::eltwise_sqrt>; MKLDNNActivationFunc<T, dnnl::algorithm::eltwise_sqrt>;
template <typename T> template <typename T>
using AbsMKLDNNFunctor = using AbsMKLDNNFunctor = MKLDNNActivationFunc<T, dnnl::algorithm::eltwise_abs>;
MKLDNNActivationFunc<T, mkldnn::algorithm::eltwise_abs>;
template <typename T> template <typename T>
using EluMKLDNNFunctor = using EluMKLDNNFunctor = MKLDNNActivationFunc<T, dnnl::algorithm::eltwise_elu>;
MKLDNNActivationFunc<T, mkldnn::algorithm::eltwise_elu>;
template <typename T> template <typename T>
using ReluMKLDNNGradFunctor = using ReluMKLDNNGradFunctor =
MKLDNNActivationGradFunc<T, mkldnn::algorithm::eltwise_relu>; MKLDNNActivationGradFunc<T, dnnl::algorithm::eltwise_relu>;
template <typename T> template <typename T>
using Relu6MKLDNNGradFunctor = using Relu6MKLDNNGradFunctor =
MKLDNNActivationGradFunc<T, mkldnn::algorithm::eltwise_bounded_relu>; MKLDNNActivationGradFunc<T, dnnl::algorithm::eltwise_bounded_relu>;
template <typename T> template <typename T>
using SwishMKLDNNGradFunctor = using SwishMKLDNNGradFunctor =
MKLDNNActivationGradFunc<T, mkldnn::algorithm::eltwise_swish>; MKLDNNActivationGradFunc<T, dnnl::algorithm::eltwise_swish>;
template <typename T> template <typename T>
using HardSwishMKLDNNGradFunctor = using HardSwishMKLDNNGradFunctor =
MKLDNNActivationGradFunc<T, mkldnn::algorithm::eltwise_hardswish>; MKLDNNActivationGradFunc<T, dnnl::algorithm::eltwise_hardswish>;
template <typename T> template <typename T>
using SigmoidMKLDNNGradFunctor = using SigmoidMKLDNNGradFunctor =
MKLDNNActivationGradFunc<T, mkldnn::algorithm::eltwise_logistic>; MKLDNNActivationGradFunc<T, dnnl::algorithm::eltwise_logistic>;
template <typename T> template <typename T>
using TanhMKLDNNGradFunctor = using TanhMKLDNNGradFunctor =
MKLDNNActivationGradFunc<T, mkldnn::algorithm::eltwise_tanh>; MKLDNNActivationGradFunc<T, dnnl::algorithm::eltwise_tanh>;
template <typename T> template <typename T>
using SqrtMKLDNNGradFunctor = using SqrtMKLDNNGradFunctor =
MKLDNNActivationGradFunc<T, mkldnn::algorithm::eltwise_sqrt>; MKLDNNActivationGradFunc<T, dnnl::algorithm::eltwise_sqrt>;
template <typename T> template <typename T>
using AbsMKLDNNGradFunctor = using AbsMKLDNNGradFunctor =
MKLDNNActivationGradFunc<T, mkldnn::algorithm::eltwise_abs>; MKLDNNActivationGradFunc<T, dnnl::algorithm::eltwise_abs>;
template <typename T> template <typename T>
using EluMKLDNNGradFunctor = using EluMKLDNNGradFunctor =
MKLDNNActivationGradFunc<T, mkldnn::algorithm::eltwise_elu>; MKLDNNActivationGradFunc<T, dnnl::algorithm::eltwise_elu>;
} // namespace operators } // namespace operators
} // namespace paddle } // namespace paddle
......
...@@ -27,24 +27,23 @@ class MKLDNNDeviceContext; ...@@ -27,24 +27,23 @@ class MKLDNNDeviceContext;
namespace paddle { namespace paddle {
namespace operators { namespace operators {
using mkldnn::memory; using dnnl::memory;
using mkldnn::primitive; using dnnl::primitive;
using mkldnn::reorder; using dnnl::reorder;
using mkldnn::stream; using dnnl::stream;
using paddle::platform::MKLDNNDeviceContext; using paddle::platform::MKLDNNDeviceContext;
using platform::to_void_cast; using platform::to_void_cast;
template <typename T> template <typename T>
class BatchNormMKLDNNHandler : public platform::MKLDNNHandlerNoCachingT< class BatchNormMKLDNNHandler : public platform::MKLDNNHandlerNoCachingT<
T, mkldnn::batch_normalization_forward, T, dnnl::batch_normalization_forward,
mkldnn::batch_normalization_backward> { dnnl::batch_normalization_backward> {
public: public:
BatchNormMKLDNNHandler(const paddle::framework::ExecutionContext &ctx, BatchNormMKLDNNHandler(const paddle::framework::ExecutionContext &ctx,
const mkldnn::engine mkldnn_engine, const Tensor *x, const dnnl::engine mkldnn_engine, const Tensor *x,
const bool global_stats, const bool test_mode) const bool global_stats, const bool test_mode)
: platform::MKLDNNHandlerNoCachingT<T, : platform::MKLDNNHandlerNoCachingT<T, dnnl::batch_normalization_forward,
mkldnn::batch_normalization_forward, dnnl::batch_normalization_backward>(
mkldnn::batch_normalization_backward>(
mkldnn_engine, ctx.GetPlace()) { mkldnn_engine, ctx.GetPlace()) {
const float epsilon = ctx.Attr<float>("epsilon"); const float epsilon = ctx.Attr<float>("epsilon");
const bool fuse_with_relu = ctx.HasAttr("fuse_with_relu") const bool fuse_with_relu = ctx.HasAttr("fuse_with_relu")
...@@ -66,28 +65,27 @@ class BatchNormMKLDNNHandler : public platform::MKLDNNHandlerNoCachingT< ...@@ -66,28 +65,27 @@ class BatchNormMKLDNNHandler : public platform::MKLDNNHandlerNoCachingT<
auto src_tz = paddle::framework::vectorize(x->dims()); auto src_tz = paddle::framework::vectorize(x->dims());
// Flags are added by bitwise OR operation // Flags are added by bitwise OR operation
auto flags = mkldnn::normalization_flags::use_scale_shift; // 001 auto flags = dnnl::normalization_flags::use_scale_shift; // 001
if (global_stats) if (global_stats)
flags |= mkldnn::normalization_flags::use_global_stats; // 010 flags |= dnnl::normalization_flags::use_global_stats; // 010
if (fuse_with_relu && test_mode) if (fuse_with_relu && test_mode)
flags |= mkldnn::normalization_flags::fuse_norm_relu; // 100 flags |= dnnl::normalization_flags::fuse_norm_relu; // 100
auto md = mkldnn::memory::desc( auto md = dnnl::memory::desc(
src_tz, platform::MKLDNNGetDataType<T>(), src_tz, platform::MKLDNNGetDataType<T>(),
platform::MKLDNNFormatForSize(src_tz.size(), x->format())); platform::MKLDNNFormatForSize(src_tz.size(), x->format()));
this->AcquireForwardPrimitiveDescriptor( this->AcquireForwardPrimitiveDescriptor(
global_stats == true ? mkldnn::prop_kind::forward_scoring global_stats == true ? dnnl::prop_kind::forward_scoring
: mkldnn::prop_kind::forward_training, : dnnl::prop_kind::forward_training,
md, epsilon, flags); md, epsilon, flags);
} }
BatchNormMKLDNNHandler(const paddle::framework::ExecutionContext &ctx, BatchNormMKLDNNHandler(const paddle::framework::ExecutionContext &ctx,
const mkldnn::engine mkldnn_engine, const Tensor *in_x, const dnnl::engine mkldnn_engine, const Tensor *in_x,
const Tensor *scale, const Tensor *out_grad) const Tensor *scale, const Tensor *out_grad)
: platform::MKLDNNHandlerNoCachingT<T, : platform::MKLDNNHandlerNoCachingT<T, dnnl::batch_normalization_forward,
mkldnn::batch_normalization_forward, dnnl::batch_normalization_backward>(
mkldnn::batch_normalization_backward>(
mkldnn_engine, ctx.GetPlace()) { mkldnn_engine, ctx.GetPlace()) {
PADDLE_ENFORCE_EQ(out_grad->layout(), DataLayout::kMKLDNN, PADDLE_ENFORCE_EQ(out_grad->layout(), DataLayout::kMKLDNN,
platform::errors::InvalidArgument( platform::errors::InvalidArgument(
...@@ -112,21 +110,21 @@ class BatchNormMKLDNNHandler : public platform::MKLDNNHandlerNoCachingT< ...@@ -112,21 +110,21 @@ class BatchNormMKLDNNHandler : public platform::MKLDNNHandlerNoCachingT<
auto dims = framework::vectorize(in_x->dims()); auto dims = framework::vectorize(in_x->dims());
auto diff_dst_md = auto diff_dst_md =
mkldnn::memory::desc(dims, platform::MKLDNNGetDataType<T>(), diff_fmt); dnnl::memory::desc(dims, platform::MKLDNNGetDataType<T>(), diff_fmt);
auto src_md = auto src_md =
mkldnn::memory::desc(dims, platform::MKLDNNGetDataType<T>(), src_fmt); dnnl::memory::desc(dims, platform::MKLDNNGetDataType<T>(), src_fmt);
const float epsilon = ctx.Attr<float>("epsilon"); const float epsilon = ctx.Attr<float>("epsilon");
this->AcquireForwardPrimitiveDescriptor( this->AcquireForwardPrimitiveDescriptor(
mkldnn::prop_kind::forward_training, src_md, epsilon, dnnl::prop_kind::forward_training, src_md, epsilon,
mkldnn::normalization_flags::use_scale_shift); dnnl::normalization_flags::use_scale_shift);
this->AcquireBackwardPrimitiveDescriptor( this->AcquireBackwardPrimitiveDescriptor(
mkldnn::prop_kind::backward, diff_dst_md, src_md, epsilon, dnnl::prop_kind::backward, diff_dst_md, src_md, epsilon,
mkldnn::normalization_flags::use_scale_shift); dnnl::normalization_flags::use_scale_shift);
} }
std::shared_ptr<mkldnn::memory> AcquireScaleShiftMemory(const Tensor *scale, std::shared_ptr<dnnl::memory> AcquireScaleShiftMemory(const Tensor *scale,
const Tensor *shift) { const Tensor *shift) {
auto scale_tz = paddle::framework::vectorize(scale->dims()); auto scale_tz = paddle::framework::vectorize(scale->dims());
const unsigned int C = scale_tz[0]; const unsigned int C = scale_tz[0];
...@@ -147,34 +145,34 @@ class BatchNormMKLDNNHandler : public platform::MKLDNNHandlerNoCachingT< ...@@ -147,34 +145,34 @@ class BatchNormMKLDNNHandler : public platform::MKLDNNHandlerNoCachingT<
return scaleshift_memory; return scaleshift_memory;
} }
std::shared_ptr<mkldnn::memory> AcquireDiffScaleShiftMemory( std::shared_ptr<dnnl::memory> AcquireDiffScaleShiftMemory(
T *diff_scaleshift_data) { T *diff_scaleshift_data) {
return this->AcquireMemoryFromPrimitive(this->bwd_pd_->diff_weights_desc(), return this->AcquireMemoryFromPrimitive(this->bwd_pd_->diff_weights_desc(),
diff_scaleshift_data); diff_scaleshift_data);
} }
std::shared_ptr<mkldnn::memory> AcquireMeanMemory( std::shared_ptr<dnnl::memory> AcquireMeanMemory(
const framework::Tensor *mean) { const framework::Tensor *mean) {
const T *mean_data = mean->data<T>(); const T *mean_data = mean->data<T>();
return this->AcquireMemoryFromPrimitive(this->fwd_pd_->mean_desc(), return this->AcquireMemoryFromPrimitive(this->fwd_pd_->mean_desc(),
to_void_cast<T>(mean_data)); to_void_cast<T>(mean_data));
} }
std::shared_ptr<mkldnn::memory> AcquireMeanMemory(framework::Tensor *mean) { std::shared_ptr<dnnl::memory> AcquireMeanMemory(framework::Tensor *mean) {
T *mean_data = mean->mutable_data<T>(this->place_, T *mean_data = mean->mutable_data<T>(this->place_,
this->fwd_pd_->mean_desc().get_size()); this->fwd_pd_->mean_desc().get_size());
return this->AcquireMemoryFromPrimitive(this->fwd_pd_->mean_desc(), return this->AcquireMemoryFromPrimitive(this->fwd_pd_->mean_desc(),
mean_data); mean_data);
} }
std::shared_ptr<mkldnn::memory> AcquireVarianceMemory( std::shared_ptr<dnnl::memory> AcquireVarianceMemory(
const framework::Tensor *variance) { const framework::Tensor *variance) {
const T *variance_data = variance->data<T>(); const T *variance_data = variance->data<T>();
return this->AcquireMemoryFromPrimitive(this->fwd_pd_->variance_desc(), return this->AcquireMemoryFromPrimitive(this->fwd_pd_->variance_desc(),
to_void_cast<T>(variance_data)); to_void_cast<T>(variance_data));
} }
std::shared_ptr<mkldnn::memory> AcquireVarianceMemory( std::shared_ptr<dnnl::memory> AcquireVarianceMemory(
framework::Tensor *variance) { framework::Tensor *variance) {
T *variance_data = variance->mutable_data<T>( T *variance_data = variance->mutable_data<T>(
this->place_, this->fwd_pd_->variance_desc().get_size()); this->place_, this->fwd_pd_->variance_desc().get_size());
...@@ -233,12 +231,11 @@ class BatchNormMKLDNNOpKernel : public paddle::framework::OpKernel<T> { ...@@ -233,12 +231,11 @@ class BatchNormMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
y->set_format(platform::GetMKLDNNFormat(*dst_memory)); y->set_format(platform::GetMKLDNNFormat(*dst_memory));
auto &astream = platform::MKLDNNDeviceContext::tls().get_stream(); auto &astream = platform::MKLDNNDeviceContext::tls().get_stream();
batch_norm_p->execute(astream, batch_norm_p->execute(astream, {{DNNL_ARG_SRC, *src_memory},
{{MKLDNN_ARG_SRC, *src_memory}, {DNNL_ARG_SCALE_SHIFT, *scaleshift_memory},
{MKLDNN_ARG_SCALE_SHIFT, *scaleshift_memory}, {DNNL_ARG_MEAN, *mean_memory},
{MKLDNN_ARG_MEAN, *mean_memory}, {DNNL_ARG_VARIANCE, *variance_memory},
{MKLDNN_ARG_VARIANCE, *variance_memory}, {DNNL_ARG_DST, *dst_memory}});
{MKLDNN_ARG_DST, *dst_memory}});
astream.wait(); astream.wait();
if (!global_stats) { if (!global_stats) {
...@@ -307,13 +304,13 @@ class BatchNormMKLDNNGradOpKernel : public paddle::framework::OpKernel<T> { ...@@ -307,13 +304,13 @@ class BatchNormMKLDNNGradOpKernel : public paddle::framework::OpKernel<T> {
auto &astream = platform::MKLDNNDeviceContext::tls().get_stream(); auto &astream = platform::MKLDNNDeviceContext::tls().get_stream();
batch_norm_bwd_p->execute( batch_norm_bwd_p->execute(
astream, {{MKLDNN_ARG_SRC, *src_memory}, astream, {{DNNL_ARG_SRC, *src_memory},
{MKLDNN_ARG_MEAN, *mean_memory}, {DNNL_ARG_MEAN, *mean_memory},
{MKLDNN_ARG_VARIANCE, *variance_memory}, {DNNL_ARG_VARIANCE, *variance_memory},
{MKLDNN_ARG_DIFF_DST, *diff_dst_memory}, {DNNL_ARG_DIFF_DST, *diff_dst_memory},
{MKLDNN_ARG_SCALE_SHIFT, *scaleshift_memory}, {DNNL_ARG_SCALE_SHIFT, *scaleshift_memory},
{MKLDNN_ARG_DIFF_SRC, *diff_src_memory}, {DNNL_ARG_DIFF_SRC, *diff_src_memory},
{MKLDNN_ARG_DIFF_SCALE_SHIFT, *diff_scaleshift_memory}}); {DNNL_ARG_DIFF_SCALE_SHIFT, *diff_scaleshift_memory}});
astream.wait(); astream.wait();
T *diff_scale_data = diff_scale->mutable_data<T>(ctx.GetPlace()); T *diff_scale_data = diff_scale->mutable_data<T>(ctx.GetPlace());
......
...@@ -34,7 +34,7 @@ class ClipMKLDNNKernel : public paddle::framework::OpKernel<T> { ...@@ -34,7 +34,7 @@ class ClipMKLDNNKernel : public paddle::framework::OpKernel<T> {
auto* out = ctx.Output<Tensor>("Out"); auto* out = ctx.Output<Tensor>("Out");
paddle::platform::ActivationMKLDNNHandler<T> handler( paddle::platform::ActivationMKLDNNHandler<T> handler(
mkldnn::algorithm::eltwise_clip_v2, ctx, mkldnn_engine, ctx.GetPlace(), dnnl::algorithm::eltwise_clip_v2, ctx, mkldnn_engine, ctx.GetPlace(),
x); x);
auto src_memory_p = handler.AcquireSrcMemory(x); auto src_memory_p = handler.AcquireSrcMemory(x);
...@@ -42,8 +42,8 @@ class ClipMKLDNNKernel : public paddle::framework::OpKernel<T> { ...@@ -42,8 +42,8 @@ class ClipMKLDNNKernel : public paddle::framework::OpKernel<T> {
auto activation_p = handler.AcquireForwardPrimitive(); auto activation_p = handler.AcquireForwardPrimitive();
auto& astream = paddle::platform::MKLDNNDeviceContext::tls().get_stream(); auto& astream = paddle::platform::MKLDNNDeviceContext::tls().get_stream();
activation_p->execute(astream, {{MKLDNN_ARG_FROM, *src_memory_p}, activation_p->execute(astream, {{DNNL_ARG_FROM, *src_memory_p},
{MKLDNN_ARG_TO, *dst_memory_p}}); {DNNL_ARG_TO, *dst_memory_p}});
astream.wait(); astream.wait();
out->set_layout(paddle::framework::DataLayout::kMKLDNN); out->set_layout(paddle::framework::DataLayout::kMKLDNN);
...@@ -68,8 +68,8 @@ class ClipGradMKLDNNKernel : public paddle::framework::OpKernel<T> { ...@@ -68,8 +68,8 @@ class ClipGradMKLDNNKernel : public paddle::framework::OpKernel<T> {
auto* dout = ctx.Input<Tensor>(paddle::framework::GradVarName("Out")); auto* dout = ctx.Input<Tensor>(paddle::framework::GradVarName("Out"));
paddle::platform::ActivationMKLDNNHandler<T> handler( paddle::platform::ActivationMKLDNNHandler<T> handler(
mkldnn::algorithm::eltwise_clip_v2, ctx, mkldnn_engine, ctx.GetPlace(), dnnl::algorithm::eltwise_clip_v2, ctx, mkldnn_engine, ctx.GetPlace(), x,
x, dout); dout);
auto src_memory_p = handler.AcquireBackwardSrcMemory(x); auto src_memory_p = handler.AcquireBackwardSrcMemory(x);
auto diff_dst_memory_p = handler.AcquireDiffDstMemory(dout); auto diff_dst_memory_p = handler.AcquireDiffDstMemory(dout);
...@@ -78,9 +78,9 @@ class ClipGradMKLDNNKernel : public paddle::framework::OpKernel<T> { ...@@ -78,9 +78,9 @@ class ClipGradMKLDNNKernel : public paddle::framework::OpKernel<T> {
auto& astream = paddle::platform::MKLDNNDeviceContext::tls().get_stream(); auto& astream = paddle::platform::MKLDNNDeviceContext::tls().get_stream();
activation_backward_p->execute(astream, activation_backward_p->execute(astream,
{{MKLDNN_ARG_SRC, *src_memory_p}, {{DNNL_ARG_SRC, *src_memory_p},
{MKLDNN_ARG_DIFF_DST, *diff_dst_memory_p}, {DNNL_ARG_DIFF_DST, *diff_dst_memory_p},
{MKLDNN_ARG_DIFF_SRC, *diff_src_memory_p}}); {DNNL_ARG_DIFF_SRC, *diff_src_memory_p}});
astream.wait(); astream.wait();
dx->set_layout(paddle::framework::DataLayout::kMKLDNN); dx->set_layout(paddle::framework::DataLayout::kMKLDNN);
......
...@@ -24,10 +24,10 @@ namespace operators { ...@@ -24,10 +24,10 @@ namespace operators {
using framework::DataLayout; using framework::DataLayout;
using framework::Tensor; using framework::Tensor;
using framework::LoDTensor; using framework::LoDTensor;
using mkldnn::memory; using dnnl::memory;
using mkldnn::primitive; using dnnl::primitive;
using mkldnn::concat; using dnnl::concat;
using mkldnn::stream; using dnnl::stream;
using platform::to_void_cast; using platform::to_void_cast;
template <typename T> template <typename T>
...@@ -35,7 +35,7 @@ class ConcatMKLDNNHandler ...@@ -35,7 +35,7 @@ class ConcatMKLDNNHandler
: public platform::MKLDNNHandlerNoCachingT<T, dnnl::concat> { : public platform::MKLDNNHandlerNoCachingT<T, dnnl::concat> {
public: public:
ConcatMKLDNNHandler(const framework::ExecutionContext& ctx, ConcatMKLDNNHandler(const framework::ExecutionContext& ctx,
const mkldnn::engine mkldnn_engine, const dnnl::engine mkldnn_engine,
const std::vector<const Tensor*>& inputs, Tensor* output) const std::vector<const Tensor*>& inputs, Tensor* output)
: platform::MKLDNNHandlerNoCachingT<T, dnnl::concat>(mkldnn_engine, : platform::MKLDNNHandlerNoCachingT<T, dnnl::concat>(mkldnn_engine,
ctx.GetPlace()) { ctx.GetPlace()) {
...@@ -86,7 +86,7 @@ class ConcatMKLDNNHandler ...@@ -86,7 +86,7 @@ class ConcatMKLDNNHandler
dst_md, concat_axis, srcs_md, this->engine_)); dst_md, concat_axis, srcs_md, this->engine_));
} }
std::shared_ptr<mkldnn::memory> AcquireSrcMemory(const Tensor& input, int i) { std::shared_ptr<dnnl::memory> AcquireSrcMemory(const Tensor& input, int i) {
const T* input_data = input.data<T>(); const T* input_data = input.data<T>();
return this->AcquireMemoryFromPrimitive(this->fwd_pd_->src_desc(i), return this->AcquireMemoryFromPrimitive(this->fwd_pd_->src_desc(i),
to_void_cast<T>(input_data)); to_void_cast<T>(input_data));
...@@ -139,9 +139,9 @@ class ConcatMKLDNNOpKernel : public paddle::framework::OpKernel<T> { ...@@ -139,9 +139,9 @@ class ConcatMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
std::unordered_map<int, memory> args; std::unordered_map<int, memory> args;
for (size_t i = 0; i < multi_input.size(); ++i) { for (size_t i = 0; i < multi_input.size(); ++i) {
srcs.push_back(handler.AcquireSrcMemory(*(multi_input[i]), i)); srcs.push_back(handler.AcquireSrcMemory(*(multi_input[i]), i));
args.insert({MKLDNN_ARG_MULTIPLE_SRC + i, *(srcs.at(i))}); args.insert({DNNL_ARG_MULTIPLE_SRC + i, *(srcs.at(i))});
} }
args.insert({MKLDNN_ARG_DST, *dst_mem}); args.insert({DNNL_ARG_DST, *dst_mem});
concat_p->execute(astream, args); concat_p->execute(astream, args);
astream.wait(); astream.wait();
...@@ -185,7 +185,7 @@ class ConcatGradMKLDNNOpKernel : public paddle::framework::OpKernel<T> { ...@@ -185,7 +185,7 @@ class ConcatGradMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
std::vector<int64_t> offset(dout_vec_dims.size(), 0); std::vector<int64_t> offset(dout_vec_dims.size(), 0);
mkldnn::memory::data_type dout_type = dnnl::memory::data_type dout_type =
framework::ToMKLDNNDataType(dout->type()); framework::ToMKLDNNDataType(dout->type());
platform::ReorderMKLDNNHandler reorder_handler(dout_vec_dims, dout->type(), platform::ReorderMKLDNNHandler reorder_handler(dout_vec_dims, dout->type(),
dout_type, onednn_engine); dout_type, onednn_engine);
......
...@@ -33,18 +33,18 @@ inline MKLDNNMemoryFormat GetWeightsFormat(const MKLDNNMemoryFormat format, ...@@ -33,18 +33,18 @@ inline MKLDNNMemoryFormat GetWeightsFormat(const MKLDNNMemoryFormat format,
} }
} }
static mkldnn::memory::data_type GetDstType(bool is_int8, bool is_bfloat16, static dnnl::memory::data_type GetDstType(bool is_int8, bool is_bfloat16,
bool force_fp32_output, bool force_fp32_output,
std::string fuse_activation, std::string fuse_activation,
bool fuse_residual_conn, bool fuse_residual_conn,
const Tensor* residual_param) { const Tensor* residual_param) {
auto dst_dt = mkldnn::memory::data_type::f32; auto dst_dt = dnnl::memory::data_type::f32;
if (is_int8) { if (is_int8) {
dst_dt = (fuse_activation == "relu" || fuse_activation == "relu6") dst_dt = (fuse_activation == "relu" || fuse_activation == "relu6")
? mkldnn::memory::data_type::u8 ? dnnl::memory::data_type::u8
: mkldnn::memory::data_type::s8; : dnnl::memory::data_type::s8;
if (force_fp32_output) { if (force_fp32_output) {
dst_dt = mkldnn::memory::data_type::f32; dst_dt = dnnl::memory::data_type::f32;
} }
if (fuse_residual_conn && residual_param) { if (fuse_residual_conn && residual_param) {
auto residual_dt = framework::ToMKLDNNDataType(residual_param->type()); auto residual_dt = framework::ToMKLDNNDataType(residual_param->type());
...@@ -52,7 +52,7 @@ static mkldnn::memory::data_type GetDstType(bool is_int8, bool is_bfloat16, ...@@ -52,7 +52,7 @@ static mkldnn::memory::data_type GetDstType(bool is_int8, bool is_bfloat16,
} }
} else { } else {
if (!force_fp32_output && is_bfloat16) { if (!force_fp32_output && is_bfloat16) {
dst_dt = mkldnn::memory::data_type::bf16; dst_dt = dnnl::memory::data_type::bf16;
if (fuse_residual_conn && residual_param) { if (fuse_residual_conn && residual_param) {
dst_dt = framework::ToMKLDNNDataType(residual_param->type()); dst_dt = framework::ToMKLDNNDataType(residual_param->type());
} }
...@@ -63,19 +63,19 @@ static mkldnn::memory::data_type GetDstType(bool is_int8, bool is_bfloat16, ...@@ -63,19 +63,19 @@ static mkldnn::memory::data_type GetDstType(bool is_int8, bool is_bfloat16,
template <typename T, typename K, typename T_out> template <typename T, typename K, typename T_out>
class ConvMKLDNNHandlerT class ConvMKLDNNHandlerT
: public platform::MKLDNNHandlerT<T, mkldnn::convolution_forward, : public platform::MKLDNNHandlerT<T, dnnl::convolution_forward,
mkldnn::convolution_backward_data, dnnl::convolution_backward_data,
mkldnn::convolution_backward_weights> { dnnl::convolution_backward_weights> {
public: public:
ConvMKLDNNHandlerT(const framework::ExecutionContext& ctx, ConvMKLDNNHandlerT(const framework::ExecutionContext& ctx,
const platform::MKLDNNDeviceContext& dev_ctx, const platform::MKLDNNDeviceContext& dev_ctx,
const mkldnn::engine mkldnn_engine, const dnnl::engine mkldnn_engine,
platform::Place cpu_place, const Tensor* input, platform::Place cpu_place, const Tensor* input,
const Tensor* filter, const Tensor* bias, Tensor* output, const Tensor* filter, const Tensor* bias, Tensor* output,
const std::string& unique_name) const std::string& unique_name)
: platform::MKLDNNHandlerT<T, mkldnn::convolution_forward, : platform::MKLDNNHandlerT<T, dnnl::convolution_forward,
mkldnn::convolution_backward_data, dnnl::convolution_backward_data,
mkldnn::convolution_backward_weights>( dnnl::convolution_backward_weights>(
dev_ctx, mkldnn_engine, cpu_place, dev_ctx, mkldnn_engine, cpu_place,
platform::CreateKey(dev_ctx, framework::vectorize(input->dims()), platform::CreateKey(dev_ctx, framework::vectorize(input->dims()),
unique_name)) { unique_name)) {
...@@ -184,27 +184,27 @@ class ConvMKLDNNHandlerT ...@@ -184,27 +184,27 @@ class ConvMKLDNNHandlerT
const auto dst_tz = framework::vectorize(output->dims()); const auto dst_tz = framework::vectorize(output->dims());
const mkldnn::memory::dims stride_dims = strides; const dnnl::memory::dims stride_dims = strides;
const auto mkldnn_paddings = platform::ToMkldnnPadding(paddings); const auto mkldnn_paddings = platform::ToMkldnnPadding(paddings);
const mkldnn::memory::dims dilations_dims = dilations; const dnnl::memory::dims dilations_dims = dilations;
/* create memory descriptor for convolution without specified format /* create memory descriptor for convolution without specified format
* ('any') which lets a primitive (convolution in this case) choose * ('any') which lets a primitive (convolution in this case) choose
* the memory format preferred for best performance * the memory format preferred for best performance
*/ */
auto chosen_memory_format = MKLDNNMemoryFormat::any; auto chosen_memory_format = MKLDNNMemoryFormat::any;
auto data_type = mkldnn::memory::data_type::f32; auto data_type = dnnl::memory::data_type::f32;
if (ctx.Attr<std::string>("mkldnn_data_type") == "bfloat16" || if (ctx.Attr<std::string>("mkldnn_data_type") == "bfloat16" ||
std::is_same<T_out, platform::bfloat16>::value) std::is_same<T_out, platform::bfloat16>::value)
data_type = mkldnn::memory::data_type::bf16; data_type = dnnl::memory::data_type::bf16;
mkldnn::memory::desc src_md, weights_md; dnnl::memory::desc src_md, weights_md;
if (platform::is_int8<T>()) { if (platform::is_int8<T>()) {
src_md = platform::MKLDNNMemDesc( src_md = platform::MKLDNNMemDesc(
src_tz, framework::ToMKLDNNDataType(input->type()), src_tz, framework::ToMKLDNNDataType(input->type()),
chosen_memory_format); chosen_memory_format);
weights_md = platform::MKLDNNMemDesc( weights_md = platform::MKLDNNMemDesc(
weights_tz, mkldnn::memory::data_type::s8, chosen_memory_format); weights_tz, dnnl::memory::data_type::s8, chosen_memory_format);
} else { } else {
src_md = src_md =
platform::MKLDNNMemDesc(src_tz, data_type, chosen_memory_format); platform::MKLDNNMemDesc(src_tz, data_type, chosen_memory_format);
...@@ -214,24 +214,24 @@ class ConvMKLDNNHandlerT ...@@ -214,24 +214,24 @@ class ConvMKLDNNHandlerT
const auto dst_md = platform::MKLDNNMemDesc( const auto dst_md = platform::MKLDNNMemDesc(
dst_tz, platform::MKLDNNGetDataType<T_out>(), chosen_memory_format); dst_tz, platform::MKLDNNGetDataType<T_out>(), chosen_memory_format);
const auto fwd_prop_kind = is_test ? mkldnn::prop_kind::forward_inference const auto fwd_prop_kind = is_test ? dnnl::prop_kind::forward_inference
: mkldnn::prop_kind::forward_training; : dnnl::prop_kind::forward_training;
float sum_scale = 1.0f; float sum_scale = 1.0f;
std::vector<float> output_shift_scale; std::vector<float> output_shift_scale;
if (platform::is_int8<T>()) if (platform::is_int8<T>())
std::tie(sum_scale, output_shift_scale) = get_int8_scales(ctx); std::tie(sum_scale, output_shift_scale) = get_int8_scales(ctx);
const mkldnn::primitive_attr conv_attr = CreatePostOps( const dnnl::primitive_attr conv_attr = CreatePostOps(
fuse_activation, fuse_alpha, fuse_beta, fuse_residual_conn, fuse_activation, fuse_alpha, fuse_beta, fuse_residual_conn,
output_shift_scale, sum_scale); // for INT8 only! output_shift_scale, sum_scale); // for INT8 only!
if (bias) { if (bias) {
auto bias_tz = framework::vectorize(bias->dims()); auto bias_tz = framework::vectorize(bias->dims());
mkldnn::memory::desc bias_md; dnnl::memory::desc bias_md;
if (platform::is_int8<T>()) { if (platform::is_int8<T>()) {
bias_md = platform::MKLDNNMemDesc( bias_md = platform::MKLDNNMemDesc(
bias_tz, mkldnn::memory::data_type::s32, MKLDNNMemoryFormat::x); bias_tz, dnnl::memory::data_type::s32, MKLDNNMemoryFormat::x);
} else { } else {
bias_md = platform::MKLDNNMemDesc(bias_tz, data_type, bias_md = platform::MKLDNNMemDesc(bias_tz, data_type,
MKLDNNMemoryFormat::x); MKLDNNMemoryFormat::x);
...@@ -256,9 +256,9 @@ class ConvMKLDNNHandlerT ...@@ -256,9 +256,9 @@ class ConvMKLDNNHandlerT
const Tensor* filter, const Tensor* bias, const Tensor* filter, const Tensor* bias,
const Tensor* out_grad, Tensor* filter_grad, const Tensor* out_grad, Tensor* filter_grad,
Tensor* in_x_grad, const std::string& unique_name) Tensor* in_x_grad, const std::string& unique_name)
: platform::MKLDNNHandlerT<T, mkldnn::convolution_forward, : platform::MKLDNNHandlerT<T, dnnl::convolution_forward,
mkldnn::convolution_backward_data, dnnl::convolution_backward_data,
mkldnn::convolution_backward_weights>( dnnl::convolution_backward_weights>(
dev_ctx, dev_ctx.GetEngine(), cpu_place, dev_ctx, dev_ctx.GetEngine(), cpu_place,
platform::CreateKey(dev_ctx, framework::vectorize(in->dims()), platform::CreateKey(dev_ctx, framework::vectorize(in->dims()),
unique_name)) { unique_name)) {
...@@ -348,42 +348,42 @@ class ConvMKLDNNHandlerT ...@@ -348,42 +348,42 @@ class ConvMKLDNNHandlerT
auto mkldnn_paddings = platform::ToMkldnnPadding(paddings); auto mkldnn_paddings = platform::ToMkldnnPadding(paddings);
std::transform(dilations.begin(), dilations.end(), dilations.begin(), std::transform(dilations.begin(), dilations.end(), dilations.begin(),
[](int64_t i) { return i - 1; }); [](int64_t i) { return i - 1; });
const mkldnn::memory::dims dilations_dims = dilations; const dnnl::memory::dims dilations_dims = dilations;
const mkldnn::memory::dims stride_dims = strides; const dnnl::memory::dims stride_dims = strides;
// Recreating FWD PD. For training there are no post ops in convolution // Recreating FWD PD. For training there are no post ops in convolution
mkldnn::primitive_attr conv_attr; dnnl::primitive_attr conv_attr;
if (bias) { if (bias) {
auto bias_tz = framework::vectorize(bias->dims()); auto bias_tz = framework::vectorize(bias->dims());
mkldnn::memory::desc bias_md; dnnl::memory::desc bias_md;
if (platform::is_int8<T>()) { if (platform::is_int8<T>()) {
bias_md = platform::MKLDNNMemDesc( bias_md = platform::MKLDNNMemDesc(
bias_tz, mkldnn::memory::data_type::s32, MKLDNNMemoryFormat::x); bias_tz, dnnl::memory::data_type::s32, MKLDNNMemoryFormat::x);
} else { } else {
bias_md = platform::MKLDNNMemDesc( bias_md = platform::MKLDNNMemDesc(
bias_tz, mkldnn::memory::data_type::f32, MKLDNNMemoryFormat::x); bias_tz, dnnl::memory::data_type::f32, MKLDNNMemoryFormat::x);
} }
this->AcquireForwardPrimitiveDescriptor( this->AcquireForwardPrimitiveDescriptor(
conv_attr, mkldnn::prop_kind::forward_training, conv_attr, dnnl::prop_kind::forward_training,
dnnl::algorithm::convolution_direct, src_md, weights_md, bias_md, dnnl::algorithm::convolution_direct, src_md, weights_md, bias_md,
dst_md, stride_dims, dilations_dims, mkldnn_paddings[0], dst_md, stride_dims, dilations_dims, mkldnn_paddings[0],
mkldnn_paddings[1]); mkldnn_paddings[1]);
} else { } else {
this->AcquireForwardPrimitiveDescriptor( this->AcquireForwardPrimitiveDescriptor(
conv_attr, mkldnn::prop_kind::forward_training, conv_attr, dnnl::prop_kind::forward_training,
dnnl::algorithm::convolution_direct, src_md, weights_md, dst_md, dnnl::algorithm::convolution_direct, src_md, weights_md, dst_md,
stride_dims, dilations_dims, mkldnn_paddings[0], stride_dims, dilations_dims, mkldnn_paddings[0],
mkldnn_paddings[1]); mkldnn_paddings[1]);
} }
this->AcquireBackwardPrimitiveDescriptor( this->AcquireBackwardPrimitiveDescriptor(
mkldnn::algorithm::convolution_direct, diff_src_md, weights_md, dnnl::algorithm::convolution_direct, diff_src_md, weights_md,
diff_dst_md, strides, dilations_dims, mkldnn_paddings[0], diff_dst_md, strides, dilations_dims, mkldnn_paddings[0],
mkldnn_paddings[1]); mkldnn_paddings[1]);
this->AcquireBackwardWeightsPrimitiveDescriptor( this->AcquireBackwardWeightsPrimitiveDescriptor(
mkldnn::algorithm::convolution_direct, src_md, diff_weights_md, dnnl::algorithm::convolution_direct, src_md, diff_weights_md,
diff_dst_md, strides, dilations_dims, mkldnn_paddings[0], diff_dst_md, strides, dilations_dims, mkldnn_paddings[0],
mkldnn_paddings[1]); mkldnn_paddings[1]);
} }
...@@ -471,12 +471,12 @@ class ConvMKLDNNHandlerT ...@@ -471,12 +471,12 @@ class ConvMKLDNNHandlerT
return std::make_tuple(sum_scale, output_shift_scale); return std::make_tuple(sum_scale, output_shift_scale);
} }
mkldnn::primitive_attr CreatePostOps( dnnl::primitive_attr CreatePostOps(
std::string fuse_activation, float fuse_alpha, float fuse_beta, std::string fuse_activation, float fuse_alpha, float fuse_beta,
bool fuse_residual_conn, const std::vector<float> output_shift_scale = {}, bool fuse_residual_conn, const std::vector<float> output_shift_scale = {},
float sum_scale = 1.0f) { float sum_scale = 1.0f) {
mkldnn::primitive_attr conv_attr; dnnl::primitive_attr conv_attr;
mkldnn::post_ops post_operations; dnnl::post_ops post_operations;
if (output_shift_scale.size() > 0) { if (output_shift_scale.size() > 0) {
int mask = output_shift_scale.size() > 1 ? 1 << 1 : 0; int mask = output_shift_scale.size() > 1 ? 1 << 1 : 0;
conv_attr.set_output_scales(mask, output_shift_scale); conv_attr.set_output_scales(mask, output_shift_scale);
...@@ -494,29 +494,28 @@ class ConvMKLDNNHandlerT ...@@ -494,29 +494,28 @@ class ConvMKLDNNHandlerT
// PostOps object and configure it to execute an eltwise relu operation. // PostOps object and configure it to execute an eltwise relu operation.
constexpr float scale = 1.0f; constexpr float scale = 1.0f;
if (fuse_activation == "relu" || fuse_activation == "leaky_relu") { if (fuse_activation == "relu" || fuse_activation == "leaky_relu") {
post_operations.append_eltwise(scale, mkldnn::algorithm::eltwise_relu, post_operations.append_eltwise(scale, dnnl::algorithm::eltwise_relu,
fuse_alpha, fuse_beta); fuse_alpha, fuse_beta);
} else if (fuse_activation == "relu6") { } else if (fuse_activation == "relu6") {
post_operations.append_eltwise(scale, post_operations.append_eltwise(
mkldnn::algorithm::eltwise_bounded_relu, scale, dnnl::algorithm::eltwise_bounded_relu, fuse_alpha, fuse_beta);
fuse_alpha, fuse_beta);
} else if (fuse_activation == "swish") { } else if (fuse_activation == "swish") {
post_operations.append_eltwise(scale, mkldnn::algorithm::eltwise_swish, post_operations.append_eltwise(scale, dnnl::algorithm::eltwise_swish,
fuse_alpha, fuse_beta); fuse_alpha, fuse_beta);
} else if (fuse_activation == "hard_swish") { } else if (fuse_activation == "hard_swish") {
post_operations.append_eltwise( post_operations.append_eltwise(scale, dnnl::algorithm::eltwise_hardswish,
scale, mkldnn::algorithm::eltwise_hardswish, fuse_alpha, fuse_beta); fuse_alpha, fuse_beta);
} else if (fuse_activation == "hard_sigmoid") { } else if (fuse_activation == "hard_sigmoid") {
post_operations.append_eltwise(scale, mkldnn::algorithm::eltwise_linear, post_operations.append_eltwise(scale, dnnl::algorithm::eltwise_linear,
fuse_alpha, fuse_beta); fuse_alpha, fuse_beta);
post_operations.append_eltwise(scale, mkldnn::algorithm::eltwise_clip, post_operations.append_eltwise(scale, dnnl::algorithm::eltwise_clip, 0.0f,
0.0f, 1.0f); 1.0f);
} }
conv_attr.set_post_ops(post_operations); conv_attr.set_post_ops(post_operations);
return conv_attr; return conv_attr;
} }
std::shared_ptr<mkldnn::memory> std::shared_ptr<dnnl::memory>
AcquireWeightsMemoryWithReorderFromDataPrimitive( AcquireWeightsMemoryWithReorderFromDataPrimitive(
const framework::Tensor* filter, const int groups, const bool is_conv3d) { const framework::Tensor* filter, const int groups, const bool is_conv3d) {
const K* filter_data = filter->data<K>(); const K* filter_data = filter->data<K>();
...@@ -532,22 +531,21 @@ class ConvMKLDNNHandlerT ...@@ -532,22 +531,21 @@ class ConvMKLDNNHandlerT
platform::to_void_cast<K>(filter_data), "@weights_mem_d_p", false); platform::to_void_cast<K>(filter_data), "@weights_mem_d_p", false);
} }
std::shared_ptr<mkldnn::memory> AcquireSrcMemoryWithReorder( std::shared_ptr<dnnl::memory> AcquireSrcMemoryWithReorder(
const framework::Tensor* input) { const framework::Tensor* input) {
return this->AcquireMemoryWithReorderPrimitive( return this->AcquireMemoryWithReorderPrimitive(
input, "@src_mem_p_user", "@src_mem_p_target", "@src_mem_p", input, "@src_mem_p_user", "@src_mem_p_target", "@src_mem_p",
this->fwd_pd_->src_desc()); this->fwd_pd_->src_desc());
} }
std::shared_ptr<mkldnn::memory> std::shared_ptr<dnnl::memory> AcquireSrcMemoryWithReorderFromWeightsPrimitive(
AcquireSrcMemoryWithReorderFromWeightsPrimitive(
const framework::Tensor* input) { const framework::Tensor* input) {
return this->AcquireMemoryWithReorderPrimitive( return this->AcquireMemoryWithReorderPrimitive(
input, "@src_mem_w_p_user", "@src_mem_w_p_target", "@src_mem_w_p", input, "@src_mem_w_p_user", "@src_mem_w_p_target", "@src_mem_w_p",
this->bwd_w_pd_->src_desc()); this->bwd_w_pd_->src_desc());
} }
std::shared_ptr<mkldnn::memory> std::shared_ptr<dnnl::memory>
AcquireDiffDstMemoryWithReorderFromWeightsPrimitive( AcquireDiffDstMemoryWithReorderFromWeightsPrimitive(
const framework::Tensor* out_grad) { const framework::Tensor* out_grad) {
return this->AcquireMemoryWithReorderPrimitive( return this->AcquireMemoryWithReorderPrimitive(
...@@ -555,7 +553,7 @@ class ConvMKLDNNHandlerT ...@@ -555,7 +553,7 @@ class ConvMKLDNNHandlerT
"@diff_dst_mem_w_p", this->bwd_w_pd_->diff_dst_desc()); "@diff_dst_mem_w_p", this->bwd_w_pd_->diff_dst_desc());
} }
std::shared_ptr<mkldnn::memory> std::shared_ptr<dnnl::memory>
AcquireDiffDstMemoryWithReorderMemoryFromDataPrimitive( AcquireDiffDstMemoryWithReorderMemoryFromDataPrimitive(
const framework::Tensor* out_grad) { const framework::Tensor* out_grad) {
return this->AcquireMemoryWithReorderPrimitive( return this->AcquireMemoryWithReorderPrimitive(
...@@ -563,10 +561,10 @@ class ConvMKLDNNHandlerT ...@@ -563,10 +561,10 @@ class ConvMKLDNNHandlerT
"@diff_dst_mem_p", this->bwd_pd_->diff_dst_desc()); "@diff_dst_mem_p", this->bwd_pd_->diff_dst_desc());
} }
std::shared_ptr<mkldnn::memory> AcquireMemoryWithReorderPrimitive( std::shared_ptr<dnnl::memory> AcquireMemoryWithReorderPrimitive(
const framework::Tensor* in_mem, const char* key_mem_user, const framework::Tensor* in_mem, const char* key_mem_user,
const char* key_mem_target, const char* key_mem, const char* key_mem_target, const char* key_mem,
const mkldnn::memory::desc& mem_md) { const dnnl::memory::desc& mem_md) {
const T* in_mem_data = in_mem->data<T>(); const T* in_mem_data = in_mem->data<T>();
const std::string user_key_suffix{key_mem_user}; const std::string user_key_suffix{key_mem_user};
auto user_mem_p = this->AcquireMemory(user_key_suffix); auto user_mem_p = this->AcquireMemory(user_key_suffix);
...@@ -588,7 +586,7 @@ class ConvMKLDNNHandlerT ...@@ -588,7 +586,7 @@ class ConvMKLDNNHandlerT
} }
} }
std::shared_ptr<mkldnn::memory> AcquireWeightsMemoryWithReorder( std::shared_ptr<dnnl::memory> AcquireWeightsMemoryWithReorder(
const framework::Tensor* filter, const int groups, const bool is_conv3d, const framework::Tensor* filter, const int groups, const bool is_conv3d,
const bool is_test, const std::vector<float>& scale_data = {1.0f}, const bool is_test, const std::vector<float>& scale_data = {1.0f},
int mask = 0) { int mask = 0) {
...@@ -613,7 +611,7 @@ class ConvMKLDNNHandlerT ...@@ -613,7 +611,7 @@ class ConvMKLDNNHandlerT
} }
} }
std::shared_ptr<mkldnn::memory> AcquireBiasMemoryWithReorder( std::shared_ptr<dnnl::memory> AcquireBiasMemoryWithReorder(
const framework::Tensor* bias, const bool is_test, const framework::Tensor* bias, const bool is_test,
const std::vector<float>& scale_data = {1.0f}, int mask = 0) { const std::vector<float>& scale_data = {1.0f}, int mask = 0) {
auto bias_mem_p = this->AcquireMemory("@bias_mem_p_target"); auto bias_mem_p = this->AcquireMemory("@bias_mem_p_target");
...@@ -632,7 +630,7 @@ class ConvMKLDNNHandlerT ...@@ -632,7 +630,7 @@ class ConvMKLDNNHandlerT
} }
} }
std::shared_ptr<mkldnn::memory> AcquireResidualMemory( std::shared_ptr<dnnl::memory> AcquireResidualMemory(
const framework::Tensor* residual_param) { const framework::Tensor* residual_param) {
void* residual_data = void* residual_data =
residual_param->type() == framework::DataTypeTrait<T_out>::DataType() residual_param->type() == framework::DataTypeTrait<T_out>::DataType()
...@@ -653,7 +651,7 @@ class ConvMKLDNNHandlerT ...@@ -653,7 +651,7 @@ class ConvMKLDNNHandlerT
} }
} }
std::shared_ptr<mkldnn::memory> AcquireDstMemoryWithResidual( std::shared_ptr<dnnl::memory> AcquireDstMemoryWithResidual(
framework::Tensor* output, const framework::Tensor* residual_param) { framework::Tensor* output, const framework::Tensor* residual_param) {
std::shared_ptr<dnnl::memory> dst_memory_p; std::shared_ptr<dnnl::memory> dst_memory_p;
if (residual_param->format() != if (residual_param->format() !=
...@@ -692,17 +690,17 @@ class ConvMKLDNNOpKernel : public framework::OpKernel<T> { ...@@ -692,17 +690,17 @@ class ConvMKLDNNOpKernel : public framework::OpKernel<T> {
GetDstType(is_INT8, is_BFLOAT16, force_fp32_output, fuse_activation, GetDstType(is_INT8, is_BFLOAT16, force_fp32_output, fuse_activation,
fuse_residual_conn, residual_param); fuse_residual_conn, residual_param);
if (!is_INT8) { if (!is_INT8) {
if (dst_dt == mkldnn::memory::data_type::f32) { if (dst_dt == dnnl::memory::data_type::f32) {
ComputeFP32<float>(ctx); ComputeFP32<float>(ctx);
} else if (dst_dt == mkldnn::memory::data_type::bf16) { } else if (dst_dt == dnnl::memory::data_type::bf16) {
ComputeFP32<platform::bfloat16>(ctx); ComputeFP32<platform::bfloat16>(ctx);
} }
} else { } else {
if (dst_dt == mkldnn::memory::data_type::f32) { if (dst_dt == dnnl::memory::data_type::f32) {
ComputeINT8<float>(ctx); ComputeINT8<float>(ctx);
} else if (dst_dt == mkldnn::memory::data_type::u8) { } else if (dst_dt == dnnl::memory::data_type::u8) {
ComputeINT8<uint8_t>(ctx); ComputeINT8<uint8_t>(ctx);
} else if (dst_dt == mkldnn::memory::data_type::s8) { } else if (dst_dt == dnnl::memory::data_type::s8) {
ComputeINT8<int8_t>(ctx); ComputeINT8<int8_t>(ctx);
} }
} }
...@@ -745,13 +743,13 @@ class ConvMKLDNNOpKernel : public framework::OpKernel<T> { ...@@ -745,13 +743,13 @@ class ConvMKLDNNOpKernel : public framework::OpKernel<T> {
auto conv_p = handler.AcquireForwardPrimitive(); auto conv_p = handler.AcquireForwardPrimitive();
std::unordered_map<int, dnnl::memory> args = { std::unordered_map<int, dnnl::memory> args = {
{MKLDNN_ARG_SRC, *src_memory_p}, {DNNL_ARG_SRC, *src_memory_p},
{MKLDNN_ARG_WEIGHTS, *weights_memory_p}, {DNNL_ARG_WEIGHTS, *weights_memory_p},
{MKLDNN_ARG_DST, *dst_memory_p}}; {DNNL_ARG_DST, *dst_memory_p}};
if (bias) { if (bias) {
auto bias_memory_p = handler.AcquireBiasMemoryWithReorder(bias, is_test); auto bias_memory_p = handler.AcquireBiasMemoryWithReorder(bias, is_test);
args.insert({MKLDNN_ARG_BIAS, *bias_memory_p}); args.insert({DNNL_ARG_BIAS, *bias_memory_p});
} }
auto& astream = platform::MKLDNNDeviceContext::tls().get_stream(); auto& astream = platform::MKLDNNDeviceContext::tls().get_stream();
...@@ -821,7 +819,7 @@ class ConvMKLDNNOpKernel : public framework::OpKernel<T> { ...@@ -821,7 +819,7 @@ class ConvMKLDNNOpKernel : public framework::OpKernel<T> {
dst_memory_p = dst_memory_p =
handler.AcquireDstMemoryWithResidual(output, residual_param); handler.AcquireDstMemoryWithResidual(output, residual_param);
need_s8_to_u8 = (platform::MKLDNNGetDataType<T_out>() == need_s8_to_u8 = (platform::MKLDNNGetDataType<T_out>() ==
mkldnn::memory::data_type::s8) && dnnl::memory::data_type::s8) &&
unsigned_output; unsigned_output;
} else { } else {
dst_memory_p = handler.template AcquireDstMemory<T_out>(output); dst_memory_p = handler.template AcquireDstMemory<T_out>(output);
...@@ -830,9 +828,9 @@ class ConvMKLDNNOpKernel : public framework::OpKernel<T> { ...@@ -830,9 +828,9 @@ class ConvMKLDNNOpKernel : public framework::OpKernel<T> {
auto conv_p = handler.AcquireForwardPrimitive(); auto conv_p = handler.AcquireForwardPrimitive();
std::unordered_map<int, dnnl::memory> args = { std::unordered_map<int, dnnl::memory> args = {
{MKLDNN_ARG_SRC, *src_memory_p}, {DNNL_ARG_SRC, *src_memory_p},
{MKLDNN_ARG_WEIGHTS, *weights_memory_p}, {DNNL_ARG_WEIGHTS, *weights_memory_p},
{MKLDNN_ARG_DST, *dst_memory_p}}; {DNNL_ARG_DST, *dst_memory_p}};
if (bias) { if (bias) {
auto p_scales_tuple = handler.get_int8_bias_scales(ctx); auto p_scales_tuple = handler.get_int8_bias_scales(ctx);
...@@ -840,7 +838,7 @@ class ConvMKLDNNOpKernel : public framework::OpKernel<T> { ...@@ -840,7 +838,7 @@ class ConvMKLDNNOpKernel : public framework::OpKernel<T> {
auto bias_memory_p = handler.AcquireBiasMemoryWithReorder( auto bias_memory_p = handler.AcquireBiasMemoryWithReorder(
bias, is_test, std::get<1>(*p_scales_tuple), bias, is_test, std::get<1>(*p_scales_tuple),
std::get<0>(*p_scales_tuple)); std::get<0>(*p_scales_tuple));
args.insert({MKLDNN_ARG_BIAS, *bias_memory_p}); args.insert({DNNL_ARG_BIAS, *bias_memory_p});
} }
auto& astream = platform::MKLDNNDeviceContext::tls().get_stream(); auto& astream = platform::MKLDNNDeviceContext::tls().get_stream();
...@@ -905,9 +903,9 @@ class ConvMKLDNNGradOpKernel : public framework::OpKernel<T> { ...@@ -905,9 +903,9 @@ class ConvMKLDNNGradOpKernel : public framework::OpKernel<T> {
// TODO(grygielski) why no bias_diff? // TODO(grygielski) why no bias_diff?
conv_bwd_weights_p->execute( conv_bwd_weights_p->execute(
astream, {{MKLDNN_ARG_SRC, *src_memory_p}, astream, {{DNNL_ARG_SRC, *src_memory_p},
{MKLDNN_ARG_DIFF_DST, *diff_dst_memory_p}, {DNNL_ARG_DIFF_DST, *diff_dst_memory_p},
{MKLDNN_ARG_DIFF_WEIGHTS, *diff_weights_memory_p}}); {DNNL_ARG_DIFF_WEIGHTS, *diff_weights_memory_p}});
astream.wait(); astream.wait();
filter_grad->set_layout(framework::DataLayout::kMKLDNN); filter_grad->set_layout(framework::DataLayout::kMKLDNN);
...@@ -918,16 +916,16 @@ class ConvMKLDNNGradOpKernel : public framework::OpKernel<T> { ...@@ -918,16 +916,16 @@ class ConvMKLDNNGradOpKernel : public framework::OpKernel<T> {
// For convolution with groups convert from blocked to NCHW // For convolution with groups convert from blocked to NCHW
// otherwise there will be problems in next operators working on this data // otherwise there will be problems in next operators working on this data
if (g > 1) { if (g > 1) {
mkldnn::memory::data_type in_type = dnnl::memory::data_type in_type =
framework::ToMKLDNNDataType(filter->type()); framework::ToMKLDNNDataType(filter->type());
// for 3d conv with groups (six dimensional data reorder to goidhw) // for 3d conv with groups (six dimensional data reorder to goidhw)
// for 2d conv with groups (five dimensional data reorder to goihw) // for 2d conv with groups (five dimensional data reorder to goihw)
// auto weights_tz = framework::vectorize(filter->dims()); // auto weights_tz = framework::vectorize(filter->dims());
auto weights_tz = diff_weights_memory_p->get_desc().dims(); auto weights_tz = diff_weights_memory_p->get_desc().dims();
mkldnn::memory::format_tag out_format = dnnl::memory::format_tag out_format =
weights_tz.size() == 6 ? mkldnn::memory::format_tag::goidhw weights_tz.size() == 6 ? dnnl::memory::format_tag::goidhw
: mkldnn::memory::format_tag::goihw; : dnnl::memory::format_tag::goihw;
platform::ReorderMKLDNNHandler handler(weights_tz, filter->type(), platform::ReorderMKLDNNHandler handler(weights_tz, filter->type(),
in_type, mkldnn_engine); in_type, mkldnn_engine);
auto reorder_dst_memory_p = auto reorder_dst_memory_p =
...@@ -947,9 +945,9 @@ class ConvMKLDNNGradOpKernel : public framework::OpKernel<T> { ...@@ -947,9 +945,9 @@ class ConvMKLDNNGradOpKernel : public framework::OpKernel<T> {
// So here we have a data in goihw , which can be interpreted as OIHW // So here we have a data in goihw , which can be interpreted as OIHW
// (OIDHW for conv3d) // (OIDHW for conv3d)
// because filter_grad shape is set for OIHW (OIDHW for conv3d) // because filter_grad shape is set for OIHW (OIDHW for conv3d)
mkldnn::memory::format_tag target_format = dnnl::memory::format_tag target_format =
weights_tz.size() == 6 ? mkldnn::memory::format_tag::oidhw weights_tz.size() == 6 ? dnnl::memory::format_tag::oidhw
: mkldnn::memory::format_tag::oihw; : dnnl::memory::format_tag::oihw;
filter_grad->set_format(target_format); filter_grad->set_format(target_format);
} else { } else {
filter_grad->set_format(filter_fmt); filter_grad->set_format(filter_fmt);
...@@ -969,9 +967,9 @@ class ConvMKLDNNGradOpKernel : public framework::OpKernel<T> { ...@@ -969,9 +967,9 @@ class ConvMKLDNNGradOpKernel : public framework::OpKernel<T> {
auto conv_bwd_data_p = handler.AcquireBackwardPrimitive(); auto conv_bwd_data_p = handler.AcquireBackwardPrimitive();
conv_bwd_data_p->execute(astream, conv_bwd_data_p->execute(astream,
{{MKLDNN_ARG_WEIGHTS, *weights_memory_p}, {{DNNL_ARG_WEIGHTS, *weights_memory_p},
{MKLDNN_ARG_DIFF_DST, *diff_dst_memory_p}, {DNNL_ARG_DIFF_DST, *diff_dst_memory_p},
{MKLDNN_ARG_DIFF_SRC, *diff_src_memory_p}}); {DNNL_ARG_DIFF_SRC, *diff_src_memory_p}});
astream.wait(); astream.wait();
input_grad->set_layout(framework::DataLayout::kMKLDNN); input_grad->set_layout(framework::DataLayout::kMKLDNN);
......
...@@ -25,8 +25,7 @@ namespace operators { ...@@ -25,8 +25,7 @@ namespace operators {
using Tensor = framework::Tensor; using Tensor = framework::Tensor;
using framework::DataLayout; using framework::DataLayout;
inline mkldnn::memory::dims GetWeightsTz(const Tensor* filter, inline dnnl::memory::dims GetWeightsTz(const Tensor* filter, const int groups) {
const int groups) {
auto iohw_weights_tz = framework::vectorize(filter->dims()); auto iohw_weights_tz = framework::vectorize(filter->dims());
auto weights_tz = iohw_weights_tz; auto weights_tz = iohw_weights_tz;
...@@ -40,14 +39,13 @@ inline mkldnn::memory::dims GetWeightsTz(const Tensor* filter, ...@@ -40,14 +39,13 @@ inline mkldnn::memory::dims GetWeightsTz(const Tensor* filter,
template <typename T, typename K, typename T_out> template <typename T, typename K, typename T_out>
class ConvTransposeMKLDNNHandlerT class ConvTransposeMKLDNNHandlerT
: public platform::MKLDNNHandlerNoCachingT<T, : public platform::MKLDNNHandlerNoCachingT<T, dnnl::deconvolution_forward> {
mkldnn::deconvolution_forward> {
public: public:
ConvTransposeMKLDNNHandlerT(const framework::ExecutionContext& ctx, ConvTransposeMKLDNNHandlerT(const framework::ExecutionContext& ctx,
const mkldnn::engine mkldnn_engine, const dnnl::engine mkldnn_engine,
const Tensor* input, const Tensor* filter, const Tensor* input, const Tensor* filter,
const Tensor* bias, Tensor* output) const Tensor* bias, Tensor* output)
: platform::MKLDNNHandlerNoCachingT<T, mkldnn::deconvolution_forward>( : platform::MKLDNNHandlerNoCachingT<T, dnnl::deconvolution_forward>(
mkldnn_engine, ctx.GetPlace()), mkldnn_engine, ctx.GetPlace()),
is_test_(ctx.Attr<bool>("is_test")) { is_test_(ctx.Attr<bool>("is_test")) {
PADDLE_ENFORCE_EQ(is_test_, true, PADDLE_ENFORCE_EQ(is_test_, true,
...@@ -103,13 +101,13 @@ class ConvTransposeMKLDNNHandlerT ...@@ -103,13 +101,13 @@ class ConvTransposeMKLDNNHandlerT
} }
std::vector<int> strides_temp = ctx.Attr<std::vector<int>>("strides"); std::vector<int> strides_temp = ctx.Attr<std::vector<int>>("strides");
mkldnn::memory::dims strides(begin(strides_temp), end(strides_temp)); dnnl::memory::dims strides(begin(strides_temp), end(strides_temp));
std::vector<int> paddings_temp = ctx.Attr<std::vector<int>>("paddings"); std::vector<int> paddings_temp = ctx.Attr<std::vector<int>>("paddings");
mkldnn::memory::dims paddings(begin(paddings_temp), end(paddings_temp)); dnnl::memory::dims paddings(begin(paddings_temp), end(paddings_temp));
std::vector<int> dilations_temp = ctx.Attr<std::vector<int>>("dilations"); std::vector<int> dilations_temp = ctx.Attr<std::vector<int>>("dilations");
mkldnn::memory::dims dilations(begin(dilations_temp), end(dilations_temp)); dnnl::memory::dims dilations(begin(dilations_temp), end(dilations_temp));
int groups = ctx.Attr<int>("groups"); int groups = ctx.Attr<int>("groups");
std::string padding_algorithm = ctx.Attr<std::string>("padding_algorithm"); std::string padding_algorithm = ctx.Attr<std::string>("padding_algorithm");
...@@ -149,10 +147,10 @@ class ConvTransposeMKLDNNHandlerT ...@@ -149,10 +147,10 @@ class ConvTransposeMKLDNNHandlerT
const float fuse_alpha = ctx.Attr<float>("fuse_alpha"); const float fuse_alpha = ctx.Attr<float>("fuse_alpha");
const float fuse_beta = ctx.Attr<float>("fuse_beta"); const float fuse_beta = ctx.Attr<float>("fuse_beta");
auto data_type = mkldnn::memory::data_type::f32; auto data_type = dnnl::memory::data_type::f32;
if (ctx.Attr<std::string>("mkldnn_data_type") == "bfloat16" || if (ctx.Attr<std::string>("mkldnn_data_type") == "bfloat16" ||
std::is_same<T_out, platform::bfloat16>::value) std::is_same<T_out, platform::bfloat16>::value)
data_type = mkldnn::memory::data_type::bf16; data_type = dnnl::memory::data_type::bf16;
const auto src_md = const auto src_md =
platform::MKLDNNMemDesc(src_tz, data_type, chosen_memory_format); platform::MKLDNNMemDesc(src_tz, data_type, chosen_memory_format);
...@@ -161,10 +159,10 @@ class ConvTransposeMKLDNNHandlerT ...@@ -161,10 +159,10 @@ class ConvTransposeMKLDNNHandlerT
const auto dst_md = platform::MKLDNNMemDesc( const auto dst_md = platform::MKLDNNMemDesc(
dst_tz, platform::MKLDNNGetDataType<T_out>(), chosen_memory_format); dst_tz, platform::MKLDNNGetDataType<T_out>(), chosen_memory_format);
const mkldnn::primitive_attr conv_trans_attr = const dnnl::primitive_attr conv_trans_attr =
CreatePostOps(fuse_activation, fuse_alpha, fuse_beta); CreatePostOps(fuse_activation, fuse_alpha, fuse_beta);
auto fwd_prop_kind = is_test_ ? mkldnn::prop_kind::forward_inference auto fwd_prop_kind = is_test_ ? dnnl::prop_kind::forward_inference
: mkldnn::prop_kind::forward_training; : dnnl::prop_kind::forward_training;
if (bias) { if (bias) {
std::vector<int64_t> bias_tz = framework::vectorize(bias->dims()); std::vector<int64_t> bias_tz = framework::vectorize(bias->dims());
const auto bias_md = const auto bias_md =
...@@ -181,44 +179,43 @@ class ConvTransposeMKLDNNHandlerT ...@@ -181,44 +179,43 @@ class ConvTransposeMKLDNNHandlerT
} }
} }
mkldnn::primitive_attr CreatePostOps(const std::string& fuse_activation, dnnl::primitive_attr CreatePostOps(const std::string& fuse_activation,
const float& fuse_alpha, const float& fuse_alpha,
const float& fuse_beta) { const float& fuse_beta) {
mkldnn::primitive_attr conv_attr; dnnl::primitive_attr conv_attr;
mkldnn::post_ops post_operations; dnnl::post_ops post_operations;
// Fusion with ReLU layer is executed through the PostOps feature. Create a // Fusion with ReLU layer is executed through the PostOps feature. Create a
// PostOps object and configure it to execute an eltwise relu operation. // PostOps object and configure it to execute an eltwise relu operation.
if (fuse_activation == "relu" || fuse_activation == "leaky_relu") { if (fuse_activation == "relu" || fuse_activation == "leaky_relu") {
constexpr float scale = 1.0f; constexpr float scale = 1.0f;
post_operations.append_eltwise(scale, mkldnn::algorithm::eltwise_relu, post_operations.append_eltwise(scale, dnnl::algorithm::eltwise_relu,
fuse_alpha, fuse_beta); fuse_alpha, fuse_beta);
} else if (fuse_activation == "relu6") { } else if (fuse_activation == "relu6") {
constexpr float scale = 1.0f; constexpr float scale = 1.0f;
post_operations.append_eltwise(scale, post_operations.append_eltwise(
mkldnn::algorithm::eltwise_bounded_relu, scale, dnnl::algorithm::eltwise_bounded_relu, fuse_alpha, fuse_beta);
fuse_alpha, fuse_beta);
} else if (fuse_activation == "swish") { } else if (fuse_activation == "swish") {
constexpr float scale = 1.0f; constexpr float scale = 1.0f;
post_operations.append_eltwise(scale, mkldnn::algorithm::eltwise_swish, post_operations.append_eltwise(scale, dnnl::algorithm::eltwise_swish,
fuse_alpha, fuse_beta); fuse_alpha, fuse_beta);
} }
conv_attr.set_post_ops(post_operations); conv_attr.set_post_ops(post_operations);
return conv_attr; return conv_attr;
} }
std::shared_ptr<mkldnn::memory> AcquireSrcMemoryWithReorder( std::shared_ptr<dnnl::memory> AcquireSrcMemoryWithReorder(
const framework::Tensor* input) { const framework::Tensor* input) {
const T* input_data = input->data<T>(); const T* input_data = input->data<T>();
auto user_src_md = platform::MKLDNNMemDesc( auto user_src_md = platform::MKLDNNMemDesc(
framework::vectorize(input->dims()), platform::MKLDNNGetDataType<T>(), framework::vectorize(input->dims()), platform::MKLDNNGetDataType<T>(),
input->format()); input->format());
return platform::MKLDNNHandlerNoCachingT<T, mkldnn::deconvolution_forward>:: return platform::MKLDNNHandlerNoCachingT<T, dnnl::deconvolution_forward>::
AcquireMemoryWithReorder(user_src_md, this->fwd_pd_->src_desc(), AcquireMemoryWithReorder(user_src_md, this->fwd_pd_->src_desc(),
platform::to_void_cast<T>(input_data)); platform::to_void_cast<T>(input_data));
} }
std::shared_ptr<mkldnn::memory> AcquireWeightsMemoryWithReorder( std::shared_ptr<dnnl::memory> AcquireWeightsMemoryWithReorder(
const platform::MKLDNNDeviceContext& dev_ctx, const std::string& key, const platform::MKLDNNDeviceContext& dev_ctx, const std::string& key,
const framework::Tensor* filter, const int& groups) { const framework::Tensor* filter, const int& groups) {
const K* filter_data = filter->data<K>(); const K* filter_data = filter->data<K>();
...@@ -236,12 +233,12 @@ class ConvTransposeMKLDNNHandlerT ...@@ -236,12 +233,12 @@ class ConvTransposeMKLDNNHandlerT
} }
template <typename F = T> template <typename F = T>
std::shared_ptr<mkldnn::memory> AcquireMemoryWithReorder( std::shared_ptr<dnnl::memory> AcquireMemoryWithReorder(
const platform::MKLDNNDeviceContext& dev_ctx, const platform::MKLDNNDeviceContext& dev_ctx,
const mkldnn::memory::desc& user_md, const dnnl::memory::desc& user_md, const dnnl::memory::desc& target_md,
const mkldnn::memory::desc& target_md, void* ptr, const std::string& key, void* ptr, const std::string& key, const std::string& suffix,
const std::string& suffix, bool is_persistent = false, bool is_persistent = false, const std::vector<float>& scale_data = {1.0f},
const std::vector<float>& scale_data = {1.0f}, int mask = 0) { int mask = 0) {
const auto target_key = key + suffix + "_target"; const auto target_key = key + suffix + "_target";
const auto key_reorder_p = key + suffix + "reorder_p"; const auto key_reorder_p = key + suffix + "reorder_p";
const auto user_key = key + suffix + "_user"; const auto user_key = key + suffix + "_user";
...@@ -254,7 +251,7 @@ class ConvTransposeMKLDNNHandlerT ...@@ -254,7 +251,7 @@ class ConvTransposeMKLDNNHandlerT
std::make_shared<dnnl::memory>(user_md, this->engine_, ptr); std::make_shared<dnnl::memory>(user_md, this->engine_, ptr);
if (user_md != target_md) { if (user_md != target_md) {
target_memory_p = target_memory_p =
std::make_shared<mkldnn::memory>(target_md, this->engine_); std::make_shared<dnnl::memory>(target_md, this->engine_);
dnnl::reorder::primitive_desc reorder_pdesc; dnnl::reorder::primitive_desc reorder_pdesc;
if (platform::is_int8<T>()) { if (platform::is_int8<T>()) {
dnnl::primitive_attr attr; dnnl::primitive_attr attr;
...@@ -271,8 +268,8 @@ class ConvTransposeMKLDNNHandlerT ...@@ -271,8 +268,8 @@ class ConvTransposeMKLDNNHandlerT
auto& astream = platform::MKLDNNDeviceContext::tls().get_stream(); auto& astream = platform::MKLDNNDeviceContext::tls().get_stream();
platform::RecordEvent record_reorder("int_reorder", platform::RecordEvent record_reorder("int_reorder",
platform::EventRole::kUniqueOp); platform::EventRole::kUniqueOp);
reorder_p->execute(astream, {{MKLDNN_ARG_FROM, *user_memory_p}, reorder_p->execute(astream, {{DNNL_ARG_FROM, *user_memory_p},
{MKLDNN_ARG_TO, *target_memory_p}}); {DNNL_ARG_TO, *target_memory_p}});
astream.wait(); astream.wait();
} else { } else {
target_memory_p = user_memory_p; target_memory_p = user_memory_p;
...@@ -288,20 +285,20 @@ class ConvTransposeMKLDNNHandlerT ...@@ -288,20 +285,20 @@ class ConvTransposeMKLDNNHandlerT
// TODO(jczaja): Here we detect if reorder is cached it means it is needed // TODO(jczaja): Here we detect if reorder is cached it means it is needed
// need to change this to get rid of keys // need to change this to get rid of keys
auto reorder_p = std::static_pointer_cast<mkldnn::reorder>( auto reorder_p = std::static_pointer_cast<dnnl::reorder>(
dev_ctx.GetBlob(key_reorder_p)); dev_ctx.GetBlob(key_reorder_p));
if (reorder_p != nullptr) { if (reorder_p != nullptr) {
platform::RecordEvent record_reorder("int_reorder", platform::RecordEvent record_reorder("int_reorder",
platform::EventRole::kUniqueOp); platform::EventRole::kUniqueOp);
reorder_p->execute(astream, {{MKLDNN_ARG_FROM, *user_memory_p}, reorder_p->execute(astream, {{DNNL_ARG_FROM, *user_memory_p},
{MKLDNN_ARG_TO, *target_memory_p}}); {DNNL_ARG_TO, *target_memory_p}});
astream.wait(); astream.wait();
} }
} }
return target_memory_p; return target_memory_p;
} }
std::shared_ptr<mkldnn::memory> AcquireBiasMemoryWithReorder( std::shared_ptr<dnnl::memory> AcquireBiasMemoryWithReorder(
const platform::MKLDNNDeviceContext& dev_ctx, const std::string& key, const platform::MKLDNNDeviceContext& dev_ctx, const std::string& key,
const framework::Tensor* bias) { const framework::Tensor* bias) {
const K* bias_data = bias->data<K>(); const K* bias_data = bias->data<K>();
...@@ -364,14 +361,14 @@ class ConvTransposeMKLDNNOpKernel : public framework::OpKernel<T> { ...@@ -364,14 +361,14 @@ class ConvTransposeMKLDNNOpKernel : public framework::OpKernel<T> {
auto conv_p = handler.AcquireForwardPrimitive(); auto conv_p = handler.AcquireForwardPrimitive();
std::unordered_map<int, dnnl::memory> args = { std::unordered_map<int, dnnl::memory> args = {
{MKLDNN_ARG_SRC, *src_memory_p}, {DNNL_ARG_SRC, *src_memory_p},
{MKLDNN_ARG_WEIGHTS, *weights_memory_p}, {DNNL_ARG_WEIGHTS, *weights_memory_p},
{MKLDNN_ARG_DST, *dst_memory_p}}; {DNNL_ARG_DST, *dst_memory_p}};
if (bias) { if (bias) {
auto bias_memory_p = auto bias_memory_p =
handler.AcquireBiasMemoryWithReorder(dev_ctx, key, bias); handler.AcquireBiasMemoryWithReorder(dev_ctx, key, bias);
args.insert({MKLDNN_ARG_BIAS, *bias_memory_p}); args.insert({DNNL_ARG_BIAS, *bias_memory_p});
} }
auto& astream = platform::MKLDNNDeviceContext::tls().get_stream(); auto& astream = platform::MKLDNNDeviceContext::tls().get_stream();
conv_p->execute(astream, args); conv_p->execute(astream, args);
......
...@@ -38,16 +38,16 @@ using framework::ExecutionContext; ...@@ -38,16 +38,16 @@ using framework::ExecutionContext;
using platform::MKLDNNDeviceContext; using platform::MKLDNNDeviceContext;
using platform::to_void_cast; using platform::to_void_cast;
using platform::GetMKLDNNFormat; using platform::GetMKLDNNFormat;
using mkldnn::memory; using dnnl::memory;
using mkldnn::inner_product_forward; using dnnl::inner_product_forward;
using mkldnn::primitive; using dnnl::primitive;
using mkldnn::stream; using dnnl::stream;
using mkldnn::prop_kind; using dnnl::prop_kind;
template <typename T_in, typename T_w, typename T_out> template <typename T_in, typename T_w, typename T_out>
class FCPrimitiveFactory { class FCPrimitiveFactory {
public: public:
explicit FCPrimitiveFactory(const mkldnn::engine& engine) : engine_(engine) {} explicit FCPrimitiveFactory(const dnnl::engine& engine) : engine_(engine) {}
void ExecuteFcPrimitive(const LoDTensor* input, const Tensor* weights, void ExecuteFcPrimitive(const LoDTensor* input, const Tensor* weights,
const Tensor* bias, LoDTensor* output, const Tensor* bias, LoDTensor* output,
...@@ -89,8 +89,7 @@ class FCPrimitiveFactory { ...@@ -89,8 +89,7 @@ class FCPrimitiveFactory {
// descriptor has been divided into separate cases, based on the number // descriptor has been divided into separate cases, based on the number
// of input dimensions. // of input dimensions.
size_t input_dim_num = input->dims().size(); size_t input_dim_num = input->dims().size();
paddle::optional<mkldnn::inner_product_forward::primitive_desc> paddle::optional<dnnl::inner_product_forward::primitive_desc> fc_prim_desc;
fc_prim_desc;
memory::desc usr_weights_desc = {}; memory::desc usr_weights_desc = {};
switch (input_dim_num) { switch (input_dim_num) {
case 2: case 2:
...@@ -140,14 +139,14 @@ class FCPrimitiveFactory { ...@@ -140,14 +139,14 @@ class FCPrimitiveFactory {
void Execute() { void Execute() {
auto& astream = platform::MKLDNNDeviceContext::tls().get_stream(); auto& astream = platform::MKLDNNDeviceContext::tls().get_stream();
if (bias_) { if (bias_) {
fc_->execute(astream, {{MKLDNN_ARG_SRC, *input_}, fc_->execute(astream, {{DNNL_ARG_SRC, *input_},
{MKLDNN_ARG_WEIGHTS, *weights_}, {DNNL_ARG_WEIGHTS, *weights_},
{MKLDNN_ARG_BIAS, *bias_}, {DNNL_ARG_BIAS, *bias_},
{MKLDNN_ARG_DST, *output_}}); {DNNL_ARG_DST, *output_}});
} else { } else {
fc_->execute(astream, {{MKLDNN_ARG_SRC, *input_}, fc_->execute(astream, {{DNNL_ARG_SRC, *input_},
{MKLDNN_ARG_WEIGHTS, *weights_}, {DNNL_ARG_WEIGHTS, *weights_},
{MKLDNN_ARG_DST, *output_}}); {DNNL_ARG_DST, *output_}});
} }
astream.wait(); astream.wait();
} }
...@@ -192,7 +191,7 @@ class FCPrimitiveFactory { ...@@ -192,7 +191,7 @@ class FCPrimitiveFactory {
} }
} }
mkldnn::inner_product_forward::primitive_desc Create2DFcPrimDescriptor( dnnl::inner_product_forward::primitive_desc Create2DFcPrimDescriptor(
const LoDTensor* input, const Tensor* weights, const Tensor* bias, const LoDTensor* input, const Tensor* weights, const Tensor* bias,
LoDTensor* output, const ExecutionContext& ctx) { LoDTensor* output, const ExecutionContext& ctx) {
auto src_desc = CreateMemDescriptor<T_in>(input, input->format()); auto src_desc = CreateMemDescriptor<T_in>(input, input->format());
...@@ -213,7 +212,7 @@ class FCPrimitiveFactory { ...@@ -213,7 +212,7 @@ class FCPrimitiveFactory {
memory::desc Create2DUserWeightsDesc() { return weights_->get_desc(); } memory::desc Create2DUserWeightsDesc() { return weights_->get_desc(); }
mkldnn::inner_product_forward::primitive_desc Create3DFcPrimDescriptor( dnnl::inner_product_forward::primitive_desc Create3DFcPrimDescriptor(
const LoDTensor* input, const Tensor* weights, const Tensor* bias, const LoDTensor* input, const Tensor* weights, const Tensor* bias,
LoDTensor* output, const ExecutionContext& ctx) { LoDTensor* output, const ExecutionContext& ctx) {
auto input_dims = framework::vectorize(input->dims()); auto input_dims = framework::vectorize(input->dims());
...@@ -244,7 +243,7 @@ class FCPrimitiveFactory { ...@@ -244,7 +243,7 @@ class FCPrimitiveFactory {
return CreateMemDescriptor<float>(dims, MKLDNNMemoryFormat::oiw); return CreateMemDescriptor<float>(dims, MKLDNNMemoryFormat::oiw);
} }
mkldnn::inner_product_forward::primitive_desc Create4DFcPrimDescriptor( dnnl::inner_product_forward::primitive_desc Create4DFcPrimDescriptor(
const LoDTensor* input, const Tensor* weights, const Tensor* bias, const LoDTensor* input, const Tensor* weights, const Tensor* bias,
LoDTensor* output, const ExecutionContext& ctx) { LoDTensor* output, const ExecutionContext& ctx) {
auto src_desc = CreateMemDescriptor<T_in>(input, input->format()); auto src_desc = CreateMemDescriptor<T_in>(input, input->format());
...@@ -274,13 +273,13 @@ class FCPrimitiveFactory { ...@@ -274,13 +273,13 @@ class FCPrimitiveFactory {
} }
// Convert data from one data format to another // Convert data from one data format to another
std::shared_ptr<mkldnn::memory> Reorder(const memory::desc& src_desc, std::shared_ptr<dnnl::memory> Reorder(const memory::desc& src_desc,
const memory::desc& dst_desc, const memory::desc& dst_desc,
void* src_data) { void* src_data) {
auto src_mem = memory(src_desc, engine_, src_data); auto src_mem = memory(src_desc, engine_, src_data);
auto dst_mem = std::make_shared<memory>(dst_desc, engine_); auto dst_mem = std::make_shared<memory>(dst_desc, engine_);
auto reorder = mkldnn::reorder(src_mem, *dst_mem); auto reorder = dnnl::reorder(src_mem, *dst_mem);
auto& astream = platform::MKLDNNDeviceContext::tls().get_stream(); auto& astream = platform::MKLDNNDeviceContext::tls().get_stream();
{ {
...@@ -295,11 +294,11 @@ class FCPrimitiveFactory { ...@@ -295,11 +294,11 @@ class FCPrimitiveFactory {
// Convert data from one data format to another and rescale it. // Convert data from one data format to another and rescale it.
// If the desired data type is (un)signed int8, quantization occurs here. // If the desired data type is (un)signed int8, quantization occurs here.
std::shared_ptr<mkldnn::memory> ReorderWithScale( std::shared_ptr<dnnl::memory> ReorderWithScale(
const std::shared_ptr<memory> src_mem, const memory::desc& dst_md, const std::shared_ptr<memory> src_mem, const memory::desc& dst_md,
const std::vector<float>& scale_data) { const std::vector<float>& scale_data) {
auto dst_mem = std::make_shared<mkldnn::memory>(dst_md, engine_); auto dst_mem = std::make_shared<dnnl::memory>(dst_md, engine_);
mkldnn::primitive_attr attributes; dnnl::primitive_attr attributes;
// According to MKL-DNN's documentation mask determines along which // According to MKL-DNN's documentation mask determines along which
// dimensions should the scale be applied. // dimensions should the scale be applied.
// 0 - Single scale applied to whole tensor // 0 - Single scale applied to whole tensor
...@@ -308,14 +307,14 @@ class FCPrimitiveFactory { ...@@ -308,14 +307,14 @@ class FCPrimitiveFactory {
// becuase we perform per-output-channel quantization // becuase we perform per-output-channel quantization
int mask = CreateMask(0, scale_data.size() > 1); int mask = CreateMask(0, scale_data.size() > 1);
attributes.set_output_scales(mask, scale_data); attributes.set_output_scales(mask, scale_data);
auto reorder = mkldnn::reorder(*src_mem, *dst_mem, attributes); auto reorder = dnnl::reorder(*src_mem, *dst_mem, attributes);
auto& astream = platform::MKLDNNDeviceContext::tls().get_stream(); auto& astream = platform::MKLDNNDeviceContext::tls().get_stream();
{ {
platform::RecordEvent record_reorder("int_reorder", platform::RecordEvent record_reorder("int_reorder",
platform::EventRole::kUniqueOp); platform::EventRole::kUniqueOp);
reorder.execute(astream, reorder.execute(astream,
{{MKLDNN_ARG_FROM, *src_mem}, {MKLDNN_ARG_TO, *dst_mem}}); {{DNNL_ARG_FROM, *src_mem}, {DNNL_ARG_TO, *dst_mem}});
astream.wait(); astream.wait();
} }
...@@ -323,43 +322,43 @@ class FCPrimitiveFactory { ...@@ -323,43 +322,43 @@ class FCPrimitiveFactory {
} }
template <typename T> template <typename T>
static mkldnn::memory::desc CreateMemDescriptor( static dnnl::memory::desc CreateMemDescriptor(
const std::vector<int64_t>& dims, MKLDNNMemoryFormat format) { const std::vector<int64_t>& dims, MKLDNNMemoryFormat format) {
return platform::MKLDNNMemDesc(dims, platform::MKLDNNGetDataType<T>(), return platform::MKLDNNMemDesc(dims, platform::MKLDNNGetDataType<T>(),
format); format);
} }
template <typename T> template <typename T>
static mkldnn::memory::desc CreateMemDescriptor(const Tensor* tensor, static dnnl::memory::desc CreateMemDescriptor(const Tensor* tensor,
MKLDNNMemoryFormat format) { MKLDNNMemoryFormat format) {
auto dims = framework::vectorize(tensor->dims()); auto dims = framework::vectorize(tensor->dims());
return CreateMemDescriptor<T>(dims, format); return CreateMemDescriptor<T>(dims, format);
} }
template <typename T> template <typename T>
mkldnn::memory CreateMemory(const mkldnn::memory::desc& desc, dnnl::memory CreateMemory(const dnnl::memory::desc& desc,
const Tensor* tensor) { const Tensor* tensor) {
return CreateMemory(desc, platform::to_void_cast<T>(tensor->data<T>())); return CreateMemory(desc, platform::to_void_cast<T>(tensor->data<T>()));
} }
mkldnn::memory CreateMemory(const mkldnn::memory::desc& desc, void* data) { dnnl::memory CreateMemory(const dnnl::memory::desc& desc, void* data) {
return memory(desc, engine_, data); return memory(desc, engine_, data);
} }
template <typename T> template <typename T>
std::shared_ptr<mkldnn::memory> CreateMemoryToBeCached( std::shared_ptr<dnnl::memory> CreateMemoryToBeCached(
const mkldnn::memory::desc& desc, const Tensor* tensor) { const dnnl::memory::desc& desc, const Tensor* tensor) {
return CreateMemoryToBeCached(desc, return CreateMemoryToBeCached(desc,
platform::to_void_cast<T>(tensor->data<T>())); platform::to_void_cast<T>(tensor->data<T>()));
} }
std::shared_ptr<mkldnn::memory> CreateMemoryToBeCached( std::shared_ptr<dnnl::memory> CreateMemoryToBeCached(
const mkldnn::memory::desc& desc, void* data) { const dnnl::memory::desc& desc, void* data) {
return std::make_shared<memory>(desc, engine_, data); return std::make_shared<memory>(desc, engine_, data);
} }
// Create weights memory and transform to default MKL-DNN format // Create weights memory and transform to default MKL-DNN format
std::shared_ptr<mkldnn::memory> CreateWeightsMemory(const Tensor* weights) { std::shared_ptr<dnnl::memory> CreateWeightsMemory(const Tensor* weights) {
auto dims = framework::vectorize(weights->dims()); auto dims = framework::vectorize(weights->dims());
std::swap(dims[0], dims[1]); // Correct output dimensions std::swap(dims[0], dims[1]); // Correct output dimensions
auto src_desc = CreateMemDescriptor<float>(dims, MKLDNNMemoryFormat::io); auto src_desc = CreateMemDescriptor<float>(dims, MKLDNNMemoryFormat::io);
...@@ -446,9 +445,9 @@ class FCPrimitiveFactory { ...@@ -446,9 +445,9 @@ class FCPrimitiveFactory {
} }
// Fuse relu into FC with activation type attribute has been set to 'relu' // Fuse relu into FC with activation type attribute has been set to 'relu'
mkldnn::primitive_attr CreatePostOps(const ExecutionContext& ctx) { dnnl::primitive_attr CreatePostOps(const ExecutionContext& ctx) {
mkldnn::primitive_attr attributes; dnnl::primitive_attr attributes;
mkldnn::post_ops post_operations; dnnl::post_ops post_operations;
auto output_shift_scale = ComputeOutputShiftScale(ctx); auto output_shift_scale = ComputeOutputShiftScale(ctx);
int mask = CreateMask(1, output_shift_scale.size() > 1); int mask = CreateMask(1, output_shift_scale.size() > 1);
...@@ -458,56 +457,55 @@ class FCPrimitiveFactory { ...@@ -458,56 +457,55 @@ class FCPrimitiveFactory {
constexpr float scale = 1.0f; constexpr float scale = 1.0f;
constexpr float negative_slope = 0.0f; constexpr float negative_slope = 0.0f;
constexpr float placeholder = 1.0f; // beta constexpr float placeholder = 1.0f; // beta
post_operations.append_eltwise(scale, mkldnn::algorithm::eltwise_relu, post_operations.append_eltwise(scale, dnnl::algorithm::eltwise_relu,
negative_slope, placeholder); negative_slope, placeholder);
} else if (ctx.Attr<std::string>("activation_type") == "gelu") { } else if (ctx.Attr<std::string>("activation_type") == "gelu") {
constexpr float scale = 1.0f; constexpr float scale = 1.0f;
constexpr float alpha = 0.0f; constexpr float alpha = 0.0f;
constexpr float beta = 0.0f; constexpr float beta = 0.0f;
post_operations.append_eltwise(scale, mkldnn::algorithm::eltwise_gelu, post_operations.append_eltwise(scale, dnnl::algorithm::eltwise_gelu,
alpha, beta); alpha, beta);
} else if (ctx.Attr<std::string>("activation_type") == "gelu_tanh") { } else if (ctx.Attr<std::string>("activation_type") == "gelu_tanh") {
constexpr float scale = 1.0f; constexpr float scale = 1.0f;
constexpr float alpha = 0.0f; constexpr float alpha = 0.0f;
constexpr float beta = 0.0f; constexpr float beta = 0.0f;
post_operations.append_eltwise( post_operations.append_eltwise(scale, dnnl::algorithm::eltwise_gelu_tanh,
scale, mkldnn::algorithm::eltwise_gelu_tanh, alpha, beta); alpha, beta);
} else if (ctx.Attr<std::string>("activation_type") == "gelu_erf") { } else if (ctx.Attr<std::string>("activation_type") == "gelu_erf") {
constexpr float scale = 1.0f; constexpr float scale = 1.0f;
constexpr float alpha = 0.0f; constexpr float alpha = 0.0f;
constexpr float beta = 0.0f; constexpr float beta = 0.0f;
post_operations.append_eltwise(scale, mkldnn::algorithm::eltwise_gelu_erf, post_operations.append_eltwise(scale, dnnl::algorithm::eltwise_gelu_erf,
alpha, beta); alpha, beta);
} else if (ctx.Attr<std::string>("activation_type") == "tanh") { } else if (ctx.Attr<std::string>("activation_type") == "tanh") {
constexpr float scale = 1.0f; constexpr float scale = 1.0f;
constexpr float alpha = 0.0f; constexpr float alpha = 0.0f;
constexpr float beta = 0.0f; constexpr float beta = 0.0f;
post_operations.append_eltwise(scale, mkldnn::algorithm::eltwise_tanh, post_operations.append_eltwise(scale, dnnl::algorithm::eltwise_tanh,
alpha, beta); alpha, beta);
} else if (ctx.Attr<std::string>("activation_type") == "sigmoid") { } else if (ctx.Attr<std::string>("activation_type") == "sigmoid") {
constexpr float scale = 1.0f; constexpr float scale = 1.0f;
constexpr float alpha = 0.0f; constexpr float alpha = 0.0f;
constexpr float beta = 0.0f; constexpr float beta = 0.0f;
post_operations.append_eltwise(scale, mkldnn::algorithm::eltwise_logistic, post_operations.append_eltwise(scale, dnnl::algorithm::eltwise_logistic,
alpha, beta); alpha, beta);
} else if (ctx.Attr<std::string>("activation_type") == "hard_swish") { } else if (ctx.Attr<std::string>("activation_type") == "hard_swish") {
constexpr float scale = 1.0f; constexpr float scale = 1.0f;
constexpr float alpha = 0.0f; constexpr float alpha = 0.0f;
constexpr float beta = 0.0f; constexpr float beta = 0.0f;
post_operations.append_eltwise( post_operations.append_eltwise(scale, dnnl::algorithm::eltwise_hardswish,
scale, mkldnn::algorithm::eltwise_hardswish, alpha, beta); alpha, beta);
} }
attributes.set_post_ops(post_operations); attributes.set_post_ops(post_operations);
return attributes; return attributes;
} }
mkldnn::inner_product_forward::primitive_desc CreateFcPrimDesc( dnnl::inner_product_forward::primitive_desc CreateFcPrimDesc(
const mkldnn::memory::desc& input_desc, const dnnl::memory::desc& input_desc,
const mkldnn::memory::desc& weights_desc, const dnnl::memory::desc& weights_desc,
const mkldnn::memory::desc& bias_desc, const dnnl::memory::desc& bias_desc, const dnnl::memory::desc& dst_desc,
const mkldnn::memory::desc& dst_desc, const dnnl::primitive_attr& attrs) {
const mkldnn::primitive_attr& attrs) {
auto fc_desc = auto fc_desc =
inner_product_forward::desc(prop_kind::forward_scoring, input_desc, inner_product_forward::desc(prop_kind::forward_scoring, input_desc,
weights_desc, bias_desc, dst_desc); weights_desc, bias_desc, dst_desc);
...@@ -517,8 +515,8 @@ class FCPrimitiveFactory { ...@@ -517,8 +515,8 @@ class FCPrimitiveFactory {
// Create output memory based on output tensor and inner_product // Create output memory based on output tensor and inner_product
// primitive descriptor format chosen for output // primitive descriptor format chosen for output
mkldnn::memory CreateDstMemory( dnnl::memory CreateDstMemory(
const mkldnn::inner_product_forward::primitive_desc& fc_prim_desc, const dnnl::inner_product_forward::primitive_desc& fc_prim_desc,
const ExecutionContext& ctx, Tensor* output) { const ExecutionContext& ctx, Tensor* output) {
auto dst_desc = fc_prim_desc.dst_desc(); auto dst_desc = fc_prim_desc.dst_desc();
auto buffer_size = dst_desc.get_size(); auto buffer_size = dst_desc.get_size();
...@@ -545,7 +543,7 @@ class FCPrimitiveFactory { ...@@ -545,7 +543,7 @@ class FCPrimitiveFactory {
} }
private: private:
const mkldnn::engine& engine_; const dnnl::engine& engine_;
paddle::optional<memory> input_; paddle::optional<memory> input_;
paddle::optional<memory> output_; paddle::optional<memory> output_;
std::shared_ptr<memory> bias_; std::shared_ptr<memory> bias_;
......
...@@ -22,15 +22,15 @@ using paddle::platform::MKLDNNDeviceContext; ...@@ -22,15 +22,15 @@ using paddle::platform::MKLDNNDeviceContext;
template <typename T> template <typename T>
class LRNMKLDNNHandler class LRNMKLDNNHandler
: public platform::MKLDNNHandlerNoCachingT<T, mkldnn::lrn_forward, : public platform::MKLDNNHandlerNoCachingT<T, dnnl::lrn_forward,
mkldnn::lrn_backward> { dnnl::lrn_backward> {
public: public:
LRNMKLDNNHandler(const framework::ExecutionContext& ctx, LRNMKLDNNHandler(const framework::ExecutionContext& ctx,
const mkldnn::engine mkldnn_engine, const dnnl::engine mkldnn_engine, platform::Place cpu_place,
platform::Place cpu_place, const Tensor* input) const Tensor* input)
: platform::MKLDNNHandlerNoCachingT<T, mkldnn::lrn_forward, : platform::MKLDNNHandlerNoCachingT<T, dnnl::lrn_forward,
mkldnn::lrn_backward>(mkldnn_engine, dnnl::lrn_backward>(mkldnn_engine,
cpu_place) { cpu_place) {
const int n = ctx.Attr<int>("n"); const int n = ctx.Attr<int>("n");
// MKL-DNN implements LRN in a caffe way: // MKL-DNN implements LRN in a caffe way:
...@@ -46,21 +46,21 @@ class LRNMKLDNNHandler ...@@ -46,21 +46,21 @@ class LRNMKLDNNHandler
auto dims = framework::vectorize(input->dims()); auto dims = framework::vectorize(input->dims());
auto src_md = mkldnn::memory::desc(dims, platform::MKLDNNGetDataType<T>(), auto src_md = dnnl::memory::desc(dims, platform::MKLDNNGetDataType<T>(),
input->format()); input->format());
this->AcquireForwardPrimitiveDescriptor( this->AcquireForwardPrimitiveDescriptor(
is_test ? mkldnn::prop_kind::forward_inference is_test ? dnnl::prop_kind::forward_inference
: mkldnn::prop_kind::forward_training, : dnnl::prop_kind::forward_training,
mkldnn::algorithm::lrn_across_channels, src_md, n, alpha, beta, k); dnnl::algorithm::lrn_across_channels, src_md, n, alpha, beta, k);
} }
LRNMKLDNNHandler(const framework::ExecutionContext& ctx, LRNMKLDNNHandler(const framework::ExecutionContext& ctx,
const mkldnn::engine mkldnn_engine, const dnnl::engine mkldnn_engine, platform::Place cpu_place,
platform::Place cpu_place, const Tensor* in_x, const Tensor* in_x, const Tensor* out_grad,
const Tensor* out_grad, Tensor* in_x_grad) Tensor* in_x_grad)
: platform::MKLDNNHandlerNoCachingT<T, mkldnn::lrn_forward, : platform::MKLDNNHandlerNoCachingT<T, dnnl::lrn_forward,
mkldnn::lrn_backward>(mkldnn_engine, dnnl::lrn_backward>(mkldnn_engine,
cpu_place) { cpu_place) {
PADDLE_ENFORCE_EQ( PADDLE_ENFORCE_EQ(
ctx.Attr<bool>("is_test"), false, ctx.Attr<bool>("is_test"), false,
...@@ -74,28 +74,28 @@ class LRNMKLDNNHandler ...@@ -74,28 +74,28 @@ class LRNMKLDNNHandler
auto dims = framework::vectorize<int64_t>(in_x->dims()); auto dims = framework::vectorize<int64_t>(in_x->dims());
auto src_md = mkldnn::memory::desc(dims, platform::MKLDNNGetDataType<T>(), auto src_md = dnnl::memory::desc(dims, platform::MKLDNNGetDataType<T>(),
in_x->format()); in_x->format());
auto diff_md = mkldnn::memory::desc(dims, platform::MKLDNNGetDataType<T>(), auto diff_md = dnnl::memory::desc(dims, platform::MKLDNNGetDataType<T>(),
out_grad->format()); out_grad->format());
this->AcquireForwardPrimitiveDescriptor( this->AcquireForwardPrimitiveDescriptor(
mkldnn::prop_kind::forward_training, dnnl::prop_kind::forward_training, dnnl::algorithm::lrn_across_channels,
mkldnn::algorithm::lrn_across_channels, src_md, n, alpha, beta, k); src_md, n, alpha, beta, k);
this->AcquireBackwardPrimitiveDescriptor( this->AcquireBackwardPrimitiveDescriptor(
mkldnn::algorithm::lrn_across_channels, src_md, diff_md, n, alpha, beta, dnnl::algorithm::lrn_across_channels, src_md, diff_md, n, alpha, beta,
k); k);
} }
std::shared_ptr<mkldnn::memory> AcquireWorkspaceMemory(Tensor* workspace) { std::shared_ptr<dnnl::memory> AcquireWorkspaceMemory(Tensor* workspace) {
T* ptr = workspace->mutable_data<T>( T* ptr = workspace->mutable_data<T>(
this->place_, this->fwd_pd_->workspace_desc().get_size()); this->place_, this->fwd_pd_->workspace_desc().get_size());
return this->AcquireMemoryFromPrimitive(this->fwd_pd_->workspace_desc(), return this->AcquireMemoryFromPrimitive(this->fwd_pd_->workspace_desc(),
ptr); ptr);
} }
std::shared_ptr<mkldnn::memory> AcquireBackwardWorkspaceMemory( std::shared_ptr<dnnl::memory> AcquireBackwardWorkspaceMemory(
const Tensor* workspace) { const Tensor* workspace) {
const T* workspace_data = workspace->data<T>(); const T* workspace_data = workspace->data<T>();
return this->AcquireMemoryFromPrimitive( return this->AcquireMemoryFromPrimitive(
...@@ -136,12 +136,12 @@ class LRNMKLDNNOpKernel : public paddle::framework::OpKernel<T> { ...@@ -136,12 +136,12 @@ class LRNMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
auto& astream = platform::MKLDNNDeviceContext::tls().get_stream(); auto& astream = platform::MKLDNNDeviceContext::tls().get_stream();
if (!workspace_memory->get_desc().is_zero()) { if (!workspace_memory->get_desc().is_zero()) {
mid->set_format(platform::GetMKLDNNFormat(*workspace_memory)); mid->set_format(platform::GetMKLDNNFormat(*workspace_memory));
lrn_p->execute(astream, {{MKLDNN_ARG_SRC, *src_memory}, lrn_p->execute(astream, {{DNNL_ARG_SRC, *src_memory},
{MKLDNN_ARG_DST, *dst_memory}, {DNNL_ARG_DST, *dst_memory},
{MKLDNN_ARG_WORKSPACE, *workspace_memory}}); {DNNL_ARG_WORKSPACE, *workspace_memory}});
} else { } else {
lrn_p->execute(astream, {{MKLDNN_ARG_SRC, *src_memory}, lrn_p->execute(
{MKLDNN_ARG_DST, *dst_memory}}); astream, {{DNNL_ARG_SRC, *src_memory}, {DNNL_ARG_DST, *dst_memory}});
} }
astream.wait(); astream.wait();
...@@ -182,10 +182,10 @@ class LRNMKLDNNGradOpKernel : public paddle::framework::OpKernel<T> { ...@@ -182,10 +182,10 @@ class LRNMKLDNNGradOpKernel : public paddle::framework::OpKernel<T> {
auto lrn_bwd = handler.AcquireBackwardPrimitive(); auto lrn_bwd = handler.AcquireBackwardPrimitive();
auto& astream = platform::MKLDNNDeviceContext::tls().get_stream(); auto& astream = platform::MKLDNNDeviceContext::tls().get_stream();
lrn_bwd->execute(astream, {{MKLDNN_ARG_SRC, *src_memory}, lrn_bwd->execute(astream, {{DNNL_ARG_SRC, *src_memory},
{MKLDNN_ARG_DIFF_DST, *diff_dst_memory}, {DNNL_ARG_DIFF_DST, *diff_dst_memory},
{MKLDNN_ARG_DIFF_SRC, *diff_src_memory}, {DNNL_ARG_DIFF_SRC, *diff_src_memory},
{MKLDNN_ARG_WORKSPACE, *workspace}}); {DNNL_ARG_WORKSPACE, *workspace}});
astream.wait(); astream.wait();
in_x_grad->set_layout(framework::DataLayout::kMKLDNN); in_x_grad->set_layout(framework::DataLayout::kMKLDNN);
......
...@@ -108,7 +108,7 @@ template <typename XT, typename YT, typename OT> ...@@ -108,7 +108,7 @@ template <typename XT, typename YT, typename OT>
class MatMulMKLDNNHandler class MatMulMKLDNNHandler
: public paddle::platform::MKLDNNHandlerNoCachingT<XT, dnnl::matmul> { : public paddle::platform::MKLDNNHandlerNoCachingT<XT, dnnl::matmul> {
public: public:
MatMulMKLDNNHandler(const mkldnn::engine engine, MatMulMKLDNNHandler(const dnnl::engine engine,
paddle::platform::Place cpu_place, Tensor* x, paddle::platform::Place cpu_place, Tensor* x,
bool trans_x, Tensor* y, bool trans_y, Tensor* out, bool trans_x, Tensor* y, bool trans_y, Tensor* out,
float scale) float scale)
...@@ -148,7 +148,7 @@ class MatMulMKLDNNHandler ...@@ -148,7 +148,7 @@ class MatMulMKLDNNHandler
this->AcquireForwardPrimitiveDescriptor(attrs, x_md, y_md, out_md); this->AcquireForwardPrimitiveDescriptor(attrs, x_md, y_md, out_md);
} }
// Constructor for FWD MatMul // Constructor for FWD MatMul
MatMulMKLDNNHandler(const mkldnn::engine engine, const ExecutionContext& ctx, MatMulMKLDNNHandler(const dnnl::engine engine, const ExecutionContext& ctx,
float scale) float scale)
: paddle::platform::MKLDNNHandlerNoCachingT<XT, dnnl::matmul>( : paddle::platform::MKLDNNHandlerNoCachingT<XT, dnnl::matmul>(
engine, ctx.GetPlace()), engine, ctx.GetPlace()),
...@@ -202,9 +202,9 @@ class MatMulMKLDNNHandler ...@@ -202,9 +202,9 @@ class MatMulMKLDNNHandler
weights_memory_p->set_data_handle(y_ptr); weights_memory_p->set_data_handle(y_ptr);
dst_memory_p->set_data_handle(out_ptr); dst_memory_p->set_data_handle(out_ptr);
matmul_p->execute(astream, { matmul_p->execute(astream, {
{MKLDNN_ARG_SRC, *src_memory_p}, {DNNL_ARG_SRC, *src_memory_p},
{MKLDNN_ARG_WEIGHTS, *weights_memory_p}, {DNNL_ARG_WEIGHTS, *weights_memory_p},
{MKLDNN_ARG_DST, *dst_memory_p}, {DNNL_ARG_DST, *dst_memory_p},
}); });
x_ptr = static_cast<char*>(x_ptr) + std::get<0>(offsets); x_ptr = static_cast<char*>(x_ptr) + std::get<0>(offsets);
y_ptr = static_cast<char*>(y_ptr) + std::get<1>(offsets); y_ptr = static_cast<char*>(y_ptr) + std::get<1>(offsets);
...@@ -218,7 +218,7 @@ class MatMulMKLDNNHandler ...@@ -218,7 +218,7 @@ class MatMulMKLDNNHandler
out->set_layout(DataLayout::kMKLDNN); out->set_layout(DataLayout::kMKLDNN);
} }
std::shared_ptr<mkldnn::memory> AcquireDstMemory( std::shared_ptr<dnnl::memory> AcquireDstMemory(
paddle::framework::Tensor* output) { paddle::framework::Tensor* output) {
// We cannot use base AcquireDstMemory as it makes an allocation request // We cannot use base AcquireDstMemory as it makes an allocation request
// base on DST memory primitive size. This is fine in general, but in MatMul // base on DST memory primitive size. This is fine in general, but in MatMul
...@@ -548,7 +548,7 @@ void MatMulGradMKLDNNKernel<T>::Compute(const ExecutionContext& ctx) const { ...@@ -548,7 +548,7 @@ void MatMulGradMKLDNNKernel<T>::Compute(const ExecutionContext& ctx) const {
template <typename T> template <typename T>
void MatMulGradMKLDNNKernel<T>::ExecuteMatMulGrad( void MatMulGradMKLDNNKernel<T>::ExecuteMatMulGrad(
const ExecutionContext& ctx, const MKLDNNDeviceContext& dev_ctx, const ExecutionContext& ctx, const MKLDNNDeviceContext& dev_ctx,
const mkldnn::engine& engine, Tensor* x, bool trans_x, const dnnl::engine& engine, Tensor* x, bool trans_x,
bool is_fold_init_dims_x, Tensor* y, bool trans_y, bool is_fold_init_dims_y, bool is_fold_init_dims_x, Tensor* y, bool trans_y, bool is_fold_init_dims_y,
Tensor* out) const { Tensor* out) const {
// gradient is calculated in a different way when broadcasting is used // gradient is calculated in a different way when broadcasting is used
......
...@@ -33,18 +33,17 @@ using framework::DataLayout; ...@@ -33,18 +33,17 @@ using framework::DataLayout;
using framework::DDim; using framework::DDim;
using framework::ExecutionContext; using framework::ExecutionContext;
using framework::Tensor; using framework::Tensor;
using mkldnn::inner_product_forward; using dnnl::inner_product_forward;
using mkldnn::memory; using dnnl::memory;
using mkldnn::prop_kind; using dnnl::prop_kind;
using mkldnn::stream; using dnnl::stream;
using platform::MKLDNNDeviceContext; using platform::MKLDNNDeviceContext;
using platform::to_void_cast; using platform::to_void_cast;
template <typename XT, typename YT, typename OT> template <typename XT, typename YT, typename OT>
class MulPrimitiveFactory { class MulPrimitiveFactory {
public: public:
explicit MulPrimitiveFactory(const mkldnn::engine &engine) explicit MulPrimitiveFactory(const dnnl::engine &engine) : engine_(engine) {}
: engine_(engine) {}
inner_product_forward CreateMulPrimitive(const Tensor *x_input, inner_product_forward CreateMulPrimitive(const Tensor *x_input,
const Tensor *y_input, const Tensor *y_input,
...@@ -99,15 +98,15 @@ class MulPrimitiveFactory { ...@@ -99,15 +98,15 @@ class MulPrimitiveFactory {
const memory::desc &dst_desc, void *src_data, const memory::desc &dst_desc, void *src_data,
const std::vector<float> &scale) { const std::vector<float> &scale) {
auto mask = scale.size() > 1 ? 1 : 0; auto mask = scale.size() > 1 ? 1 : 0;
mkldnn::primitive_attr attr; dnnl::primitive_attr attr;
attr.set_output_scales(mask, scale); attr.set_output_scales(mask, scale);
auto src_mem = memory(src_desc, engine_, src_data); auto src_mem = memory(src_desc, engine_, src_data);
auto dst_mem = memory(dst_desc, engine_); auto dst_mem = memory(dst_desc, engine_);
auto reorder_pd = mkldnn::reorder::primitive_desc(src_mem, dst_mem, attr); auto reorder_pd = dnnl::reorder::primitive_desc(src_mem, dst_mem, attr);
auto reorder = mkldnn::reorder(reorder_pd); auto reorder = dnnl::reorder(reorder_pd);
auto &astream = platform::MKLDNNDeviceContext::tls().get_stream(); auto &astream = platform::MKLDNNDeviceContext::tls().get_stream();
{ {
...@@ -132,9 +131,9 @@ class MulPrimitiveFactory { ...@@ -132,9 +131,9 @@ class MulPrimitiveFactory {
scale_y); scale_y);
} }
mkldnn::primitive_attr CreateMulAttr(const ExecutionContext &ctx, dnnl::primitive_attr CreateMulAttr(const ExecutionContext &ctx,
bool force_fp32_output) { bool force_fp32_output) {
mkldnn::primitive_attr mul_attr; dnnl::primitive_attr mul_attr;
auto scale_y_data = ctx.Attr<std::vector<float>>("scale_y"); auto scale_y_data = ctx.Attr<std::vector<float>>("scale_y");
auto scale_x_data = ctx.Attr<float>("scale_x"); auto scale_x_data = ctx.Attr<float>("scale_x");
...@@ -185,9 +184,9 @@ class MulPrimitiveFactory { ...@@ -185,9 +184,9 @@ class MulPrimitiveFactory {
void Execute() { void Execute() {
auto &astream = platform::MKLDNNDeviceContext::tls().get_stream(); auto &astream = platform::MKLDNNDeviceContext::tls().get_stream();
(*mul_).execute(astream, {{MKLDNN_ARG_SRC, *x_input_}, (*mul_).execute(astream, {{DNNL_ARG_SRC, *x_input_},
{MKLDNN_ARG_WEIGHTS, *y_input_}, {DNNL_ARG_WEIGHTS, *y_input_},
{MKLDNN_ARG_DST, *output_}}); {DNNL_ARG_DST, *output_}});
astream.wait(); astream.wait();
} }
...@@ -268,7 +267,7 @@ class MulPrimitiveFactory { ...@@ -268,7 +267,7 @@ class MulPrimitiveFactory {
auto dst_mem = dst_data ? memory(dst_desc, engine_, dst_data) auto dst_mem = dst_data ? memory(dst_desc, engine_, dst_data)
: memory(dst_desc, engine_); : memory(dst_desc, engine_);
auto reorder = mkldnn::reorder(src_mem, dst_mem); auto reorder = dnnl::reorder(src_mem, dst_mem);
auto &astream = platform::MKLDNNDeviceContext::tls().get_stream(); auto &astream = platform::MKLDNNDeviceContext::tls().get_stream();
{ {
...@@ -289,7 +288,7 @@ class MulPrimitiveFactory { ...@@ -289,7 +288,7 @@ class MulPrimitiveFactory {
return Reorder(src_desc, dst_desc, to_void_cast<YT>(input_y->data<YT>())); return Reorder(src_desc, dst_desc, to_void_cast<YT>(input_y->data<YT>()));
} }
const mkldnn::engine &engine_; const dnnl::engine &engine_;
paddle::optional<memory> x_input_; paddle::optional<memory> x_input_;
paddle::optional<memory> y_input_; paddle::optional<memory> y_input_;
paddle::optional<memory> output_; paddle::optional<memory> output_;
...@@ -303,7 +302,7 @@ template <typename XT, typename YT, typename OT> ...@@ -303,7 +302,7 @@ template <typename XT, typename YT, typename OT>
std::shared_ptr<MulPrimitiveFactory<XT, YT, OT>> GetPrimitiveFactory( std::shared_ptr<MulPrimitiveFactory<XT, YT, OT>> GetPrimitiveFactory(
const MKLDNNDeviceContext &dev_ctx, const ExecutionContext &ctx, const MKLDNNDeviceContext &dev_ctx, const ExecutionContext &ctx,
const Tensor *input_x, const Tensor *input_y, const Tensor *input_x, const Tensor *input_y,
const mkldnn::engine &mkldnn_engine) { const dnnl::engine &mkldnn_engine) {
std::string key = platform::CreateKey( std::string key = platform::CreateKey(
dev_ctx, input_x->type(), framework::vectorize(input_x->dims()), dev_ctx, input_x->type(), framework::vectorize(input_x->dims()),
input_y->type(), framework::vectorize(input_y->dims()), input_y->type(), framework::vectorize(input_y->dims()),
...@@ -327,7 +326,7 @@ inner_product_forward GetMulPrimitive(const MKLDNNDeviceContext &dev_ctx, ...@@ -327,7 +326,7 @@ inner_product_forward GetMulPrimitive(const MKLDNNDeviceContext &dev_ctx,
const ExecutionContext &ctx, const ExecutionContext &ctx,
const Tensor *input_x, const Tensor *input_x,
const Tensor *input_y, Tensor *output, const Tensor *input_y, Tensor *output,
const mkldnn::engine &mkldnn_engine) { const dnnl::engine &mkldnn_engine) {
constexpr bool is_int8 = constexpr bool is_int8 =
std::is_same<XT, int8_t>::value || std::is_same<XT, uint8_t>::value; std::is_same<XT, int8_t>::value || std::is_same<XT, uint8_t>::value;
bool force_fp32_output = ctx.Attr<bool>("force_fp32_output"); bool force_fp32_output = ctx.Attr<bool>("force_fp32_output");
......
...@@ -20,24 +20,24 @@ namespace paddle { ...@@ -20,24 +20,24 @@ namespace paddle {
namespace operators { namespace operators {
using framework::DataLayout; using framework::DataLayout;
using mkldnn::memory; using dnnl::memory;
using mkldnn::pooling_backward; using dnnl::pooling_backward;
using mkldnn::pooling_forward; using dnnl::pooling_forward;
using mkldnn::primitive; using dnnl::primitive;
using mkldnn::reorder; using dnnl::reorder;
using mkldnn::stream; using dnnl::stream;
using platform::to_void_cast; using platform::to_void_cast;
template <typename T> template <typename T>
class PoolingMKLDNNHandler class PoolingMKLDNNHandler
: public platform::MKLDNNHandlerNoCachingT<T, mkldnn::pooling_forward, : public platform::MKLDNNHandlerNoCachingT<T, dnnl::pooling_forward,
mkldnn::pooling_backward> { dnnl::pooling_backward> {
public: public:
PoolingMKLDNNHandler(const paddle::framework::ExecutionContext& ctx, PoolingMKLDNNHandler(const paddle::framework::ExecutionContext& ctx,
const mkldnn::engine mkldnn_engine, const Tensor* input, const dnnl::engine mkldnn_engine, const Tensor* input,
Tensor* output) Tensor* output)
: platform::MKLDNNHandlerNoCachingT<T, mkldnn::pooling_forward, : platform::MKLDNNHandlerNoCachingT<T, dnnl::pooling_forward,
mkldnn::pooling_backward>( dnnl::pooling_backward>(
mkldnn_engine, ctx.GetPlace()) { mkldnn_engine, ctx.GetPlace()) {
PADDLE_ENFORCE_EQ(input->layout(), DataLayout::kMKLDNN, PADDLE_ENFORCE_EQ(input->layout(), DataLayout::kMKLDNN,
platform::errors::InvalidArgument( platform::errors::InvalidArgument(
...@@ -98,7 +98,7 @@ class PoolingMKLDNNHandler ...@@ -98,7 +98,7 @@ class PoolingMKLDNNHandler
const auto exclude_padding = ctx.Attr<bool>("exclusive"); const auto exclude_padding = ctx.Attr<bool>("exclusive");
const auto src_md = mkldnn::memory::desc(src_tz, dt, input->format()); const auto src_md = dnnl::memory::desc(src_tz, dt, input->format());
/* create memory descriptor for pooling without specified format /* create memory descriptor for pooling without specified format
* ('any') which lets a primitive (pooling in this case) choose * ('any') which lets a primitive (pooling in this case) choose
* the memory format preferred for best performance * the memory format preferred for best performance
...@@ -119,22 +119,21 @@ class PoolingMKLDNNHandler ...@@ -119,22 +119,21 @@ class PoolingMKLDNNHandler
ComputeAdaptivePoolParameters(ctx, src_tz, &ksize, &strides); ComputeAdaptivePoolParameters(ctx, src_tz, &ksize, &strides);
this->AcquireForwardPrimitiveDescriptor( this->AcquireForwardPrimitiveDescriptor(
is_test ? mkldnn::prop_kind::forward_inference is_test ? dnnl::prop_kind::forward_inference
: mkldnn::prop_kind::forward_training, : dnnl::prop_kind::forward_training,
pooling_type == "max" pooling_type == "max"
? mkldnn::algorithm::pooling_max ? dnnl::algorithm::pooling_max
: (exclude_padding : (exclude_padding ? dnnl::algorithm::pooling_avg_exclude_padding
? mkldnn::algorithm::pooling_avg_exclude_padding : dnnl::algorithm::pooling_avg_include_padding),
: mkldnn::algorithm::pooling_avg_include_padding),
src_md, dst_md, strides, ksize, mkldnn_paddings[0], mkldnn_paddings[1]); src_md, dst_md, strides, ksize, mkldnn_paddings[0], mkldnn_paddings[1]);
} }
PoolingMKLDNNHandler(const paddle::framework::ExecutionContext& ctx, PoolingMKLDNNHandler(const paddle::framework::ExecutionContext& ctx,
const mkldnn::engine mkldnn_engine, const Tensor* in_x, const dnnl::engine mkldnn_engine, const Tensor* in_x,
const Tensor* out_grad, Tensor* in_x_grad) const Tensor* out_grad, Tensor* in_x_grad)
: platform::MKLDNNHandlerNoCachingT<T, mkldnn::pooling_forward, : platform::MKLDNNHandlerNoCachingT<T, dnnl::pooling_forward,
mkldnn::pooling_backward>( dnnl::pooling_backward>(
mkldnn_engine, ctx.GetPlace()) { mkldnn_engine, ctx.GetPlace()) {
PADDLE_ENFORCE_EQ( PADDLE_ENFORCE_EQ(
in_x->layout(), DataLayout::kMKLDNN, in_x->layout(), DataLayout::kMKLDNN,
...@@ -185,12 +184,11 @@ class PoolingMKLDNNHandler ...@@ -185,12 +184,11 @@ class PoolingMKLDNNHandler
auto diff_dst_tz = paddle::framework::vectorize<int64_t>(out_grad->dims()); auto diff_dst_tz = paddle::framework::vectorize<int64_t>(out_grad->dims());
const auto dt = framework::ToMKLDNNDataType(in_x->type()); const auto dt = framework::ToMKLDNNDataType(in_x->type());
auto src_md = mkldnn::memory::desc(src_tz, dt, in_x->format()); auto src_md = dnnl::memory::desc(src_tz, dt, in_x->format());
auto dst_md = auto dst_md = dnnl::memory::desc(diff_dst_tz, dt, MKLDNNMemoryFormat::any);
mkldnn::memory::desc(diff_dst_tz, dt, MKLDNNMemoryFormat::any); auto diff_dst_md = dnnl::memory::desc(
auto diff_dst_md = mkldnn::memory::desc(
diff_dst_tz, platform::MKLDNNGetDataType<T>(), out_grad->format()); diff_dst_tz, platform::MKLDNNGetDataType<T>(), out_grad->format());
auto diff_src_md = mkldnn::memory::desc( auto diff_src_md = dnnl::memory::desc(
diff_src_tz, platform::MKLDNNGetDataType<T>(), MKLDNNMemoryFormat::any); diff_src_tz, platform::MKLDNNGetDataType<T>(), MKLDNNMemoryFormat::any);
auto mkldnn_paddings = platform::ToMkldnnPadding(paddings); auto mkldnn_paddings = platform::ToMkldnnPadding(paddings);
...@@ -205,44 +203,42 @@ class PoolingMKLDNNHandler ...@@ -205,44 +203,42 @@ class PoolingMKLDNNHandler
const auto exclude_padding = ctx.Attr<bool>("exclusive"); const auto exclude_padding = ctx.Attr<bool>("exclusive");
this->AcquireForwardPrimitiveDescriptor( this->AcquireForwardPrimitiveDescriptor(
mkldnn::prop_kind::forward_training, dnnl::prop_kind::forward_training,
pooling_type == "max" pooling_type == "max"
? mkldnn::algorithm::pooling_max ? dnnl::algorithm::pooling_max
: (exclude_padding : (exclude_padding ? dnnl::algorithm::pooling_avg_exclude_padding
? mkldnn::algorithm::pooling_avg_exclude_padding : dnnl::algorithm::pooling_avg_include_padding),
: mkldnn::algorithm::pooling_avg_include_padding),
src_md, dst_md, strides, ksize, mkldnn_paddings[0], mkldnn_paddings[1]); src_md, dst_md, strides, ksize, mkldnn_paddings[0], mkldnn_paddings[1]);
this->AcquireBackwardPrimitiveDescriptor( this->AcquireBackwardPrimitiveDescriptor(
pooling_type == "max" pooling_type == "max"
? mkldnn::algorithm::pooling_max ? dnnl::algorithm::pooling_max
: (exclude_padding : (exclude_padding ? dnnl::algorithm::pooling_avg_exclude_padding
? mkldnn::algorithm::pooling_avg_exclude_padding : dnnl::algorithm::pooling_avg_include_padding),
: mkldnn::algorithm::pooling_avg_include_padding),
diff_src_md, diff_dst_md, strides, ksize, mkldnn_paddings[0], diff_src_md, diff_dst_md, strides, ksize, mkldnn_paddings[0],
mkldnn_paddings[1]); mkldnn_paddings[1]);
} }
std::shared_ptr<mkldnn::memory> AcquireWorkspaceMemory( std::shared_ptr<dnnl::memory> AcquireWorkspaceMemory(
const platform::MKLDNNDeviceContext& dev_ctx, const platform::MKLDNNDeviceContext& dev_ctx,
const std::string& unique_name) { const std::string& unique_name) {
mkldnn::memory::desc workspace_md = this->fwd_pd_->workspace_desc(); dnnl::memory::desc workspace_md = this->fwd_pd_->workspace_desc();
// Pooling Workspace has to be passed to Grad op that // Pooling Workspace has to be passed to Grad op that
// may be executed by diffrent thread, hence // may be executed by diffrent thread, hence
// for that one we use key that does not contain TID // for that one we use key that does not contain TID
std::string workspace_key = std::string workspace_key =
platform::CreateKey(dev_ctx, workspace_md.dims(), platform::CreateKey(dev_ctx, workspace_md.dims(),
workspace_md.data_type(), unique_name, "@wrk"); workspace_md.data_type(), unique_name, "@wrk");
auto mem_p = std::static_pointer_cast<mkldnn::memory>( auto mem_p =
dev_ctx.GetBlob(workspace_key)); std::static_pointer_cast<dnnl::memory>(dev_ctx.GetBlob(workspace_key));
if (mem_p == nullptr) { if (mem_p == nullptr) {
static std::mutex acquire_barrier; static std::mutex acquire_barrier;
std::lock_guard<std::mutex> block_threads_until_finish_this_job( std::lock_guard<std::mutex> block_threads_until_finish_this_job(
acquire_barrier); acquire_barrier);
mem_p = std::static_pointer_cast<mkldnn::memory>( mem_p = std::static_pointer_cast<dnnl::memory>(
dev_ctx.GetBlob(workspace_key)); dev_ctx.GetBlob(workspace_key));
if (mem_p == nullptr) { if (mem_p == nullptr) {
mem_p = std::make_shared<mkldnn::memory>(workspace_md, this->engine_); mem_p = std::make_shared<dnnl::memory>(workspace_md, this->engine_);
dev_ctx.SetBlob(workspace_key, mem_p); dev_ctx.SetBlob(workspace_key, mem_p);
} }
} }
...@@ -318,13 +314,13 @@ class PoolMKLDNNOpKernel : public paddle::framework::OpKernel<T> { ...@@ -318,13 +314,13 @@ class PoolMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
// Training // Training
auto workspace_memory = auto workspace_memory =
handler.AcquireWorkspaceMemory(dev_ctx, ctx.OutputName("Out")); handler.AcquireWorkspaceMemory(dev_ctx, ctx.OutputName("Out"));
pool_p->execute(astream, {{MKLDNN_ARG_SRC, *src_memory}, pool_p->execute(astream, {{DNNL_ARG_SRC, *src_memory},
{MKLDNN_ARG_DST, *dst_memory}, {DNNL_ARG_DST, *dst_memory},
{MKLDNN_ARG_WORKSPACE, *workspace_memory}}); {DNNL_ARG_WORKSPACE, *workspace_memory}});
} else { } else {
// Inference // Inference
pool_p->execute(astream, {{MKLDNN_ARG_SRC, *src_memory}, pool_p->execute(
{MKLDNN_ARG_DST, *dst_memory}}); astream, {{DNNL_ARG_SRC, *src_memory}, {DNNL_ARG_DST, *dst_memory}});
} }
astream.wait(); astream.wait();
...@@ -360,13 +356,13 @@ class PoolMKLDNNGradOpKernel : public paddle::framework::OpKernel<T> { ...@@ -360,13 +356,13 @@ class PoolMKLDNNGradOpKernel : public paddle::framework::OpKernel<T> {
// Max - pooling needs Workspace // Max - pooling needs Workspace
auto workspace_memory = auto workspace_memory =
handler.AcquireWorkspaceMemory(dev_ctx, ctx.InputName("Out")); handler.AcquireWorkspaceMemory(dev_ctx, ctx.InputName("Out"));
pool_bwd_p->execute(astream, {{MKLDNN_ARG_DIFF_SRC, *diff_src_memory}, pool_bwd_p->execute(astream, {{DNNL_ARG_DIFF_SRC, *diff_src_memory},
{MKLDNN_ARG_DIFF_DST, *diff_dst_memory}, {DNNL_ARG_DIFF_DST, *diff_dst_memory},
{MKLDNN_ARG_WORKSPACE, *workspace_memory}}); {DNNL_ARG_WORKSPACE, *workspace_memory}});
} else { } else {
// Average Pooling // Average Pooling
pool_bwd_p->execute(astream, {{MKLDNN_ARG_DIFF_SRC, *diff_src_memory}, pool_bwd_p->execute(astream, {{DNNL_ARG_DIFF_SRC, *diff_src_memory},
{MKLDNN_ARG_DIFF_DST, *diff_dst_memory}}); {DNNL_ARG_DIFF_DST, *diff_dst_memory}});
} }
astream.wait(); astream.wait();
......
...@@ -37,8 +37,7 @@ class ScaleMKLDNNKernel : public framework::OpKernel<T> { ...@@ -37,8 +37,7 @@ class ScaleMKLDNNKernel : public framework::OpKernel<T> {
bool is_inplaced = x->IsSharedBufferWith(*out); bool is_inplaced = x->IsSharedBufferWith(*out);
platform::ActivationMKLDNNHandler<T> handler( platform::ActivationMKLDNNHandler<T> handler(
mkldnn::algorithm::eltwise_linear, ctx, mkldnn_engine, ctx.GetPlace(), dnnl::algorithm::eltwise_linear, ctx, mkldnn_engine, ctx.GetPlace(), x);
x);
auto src_memory_p = handler.AcquireSrcMemory(x); auto src_memory_p = handler.AcquireSrcMemory(x);
std::shared_ptr<dnnl::memory> dst_memory_p = nullptr; std::shared_ptr<dnnl::memory> dst_memory_p = nullptr;
...@@ -51,8 +50,8 @@ class ScaleMKLDNNKernel : public framework::OpKernel<T> { ...@@ -51,8 +50,8 @@ class ScaleMKLDNNKernel : public framework::OpKernel<T> {
auto activation_p = handler.AcquireForwardPrimitive(); auto activation_p = handler.AcquireForwardPrimitive();
auto& astream = paddle::platform::MKLDNNDeviceContext::tls().get_stream(); auto& astream = paddle::platform::MKLDNNDeviceContext::tls().get_stream();
activation_p->execute(astream, {{MKLDNN_ARG_FROM, *src_memory_p}, activation_p->execute(astream, {{DNNL_ARG_FROM, *src_memory_p},
{MKLDNN_ARG_TO, *dst_memory_p}}); {DNNL_ARG_TO, *dst_memory_p}});
astream.wait(); astream.wait();
out->set_layout(framework::DataLayout::kMKLDNN); out->set_layout(framework::DataLayout::kMKLDNN);
......
...@@ -32,15 +32,15 @@ using platform::to_void_cast; ...@@ -32,15 +32,15 @@ using platform::to_void_cast;
template <typename T> template <typename T>
class SoftmaxMKLDNNHandler class SoftmaxMKLDNNHandler
: public platform::MKLDNNHandlerNoCachingT<T, mkldnn::softmax_forward, : public platform::MKLDNNHandlerNoCachingT<T, dnnl::softmax_forward,
mkldnn::softmax_backward> { dnnl::softmax_backward> {
public: public:
SoftmaxMKLDNNHandler(const mkldnn::engine mkldnn_engine, SoftmaxMKLDNNHandler(const dnnl::engine mkldnn_engine,
platform::Place cpu_place, const Tensor* input, platform::Place cpu_place, const Tensor* input,
Tensor* output, const int axis) Tensor* output, const int axis)
: platform::MKLDNNHandlerNoCachingT<T, mkldnn::softmax_forward, : platform::MKLDNNHandlerNoCachingT<T, dnnl::softmax_forward,
mkldnn::softmax_backward>( dnnl::softmax_backward>(mkldnn_engine,
mkldnn_engine, cpu_place) { cpu_place) {
PADDLE_ENFORCE_EQ( PADDLE_ENFORCE_EQ(
input->dims(), output->dims(), input->dims(), output->dims(),
platform::errors::InvalidArgument( platform::errors::InvalidArgument(
...@@ -55,13 +55,13 @@ class SoftmaxMKLDNNHandler ...@@ -55,13 +55,13 @@ class SoftmaxMKLDNNHandler
} }
SoftmaxMKLDNNHandler(const framework::ExecutionContext& ctx, SoftmaxMKLDNNHandler(const framework::ExecutionContext& ctx,
const mkldnn::engine mkldnn_engine, const dnnl::engine mkldnn_engine,
platform::Place cpu_place, const Tensor* out, platform::Place cpu_place, const Tensor* out,
const Tensor* out_grad, Tensor* in_x_grad, const Tensor* out_grad, Tensor* in_x_grad,
const std::string& unique_name) const std::string& unique_name)
: platform::MKLDNNHandlerNoCachingT<T, mkldnn::softmax_forward, : platform::MKLDNNHandlerNoCachingT<T, dnnl::softmax_forward,
mkldnn::softmax_backward>( dnnl::softmax_backward>(mkldnn_engine,
mkldnn_engine, cpu_place) { cpu_place) {
PADDLE_ENFORCE_EQ(out_grad->dims(), in_x_grad->dims(), PADDLE_ENFORCE_EQ(out_grad->dims(), in_x_grad->dims(),
platform::errors::InvalidArgument( platform::errors::InvalidArgument(
"The shape of softmax_grad's input " "The shape of softmax_grad's input "
...@@ -154,10 +154,9 @@ class SoftmaxMKLDNNGradKernel : public paddle::framework::OpKernel<T> { ...@@ -154,10 +154,9 @@ class SoftmaxMKLDNNGradKernel : public paddle::framework::OpKernel<T> {
auto softmax_bwd_p = handler.AcquireBackwardPrimitive(); auto softmax_bwd_p = handler.AcquireBackwardPrimitive();
auto& astream = platform::MKLDNNDeviceContext::tls().get_stream(); auto& astream = platform::MKLDNNDeviceContext::tls().get_stream();
softmax_bwd_p->execute(astream, softmax_bwd_p->execute(astream, {{DNNL_ARG_DST, *dst_memory_p},
{{MKLDNN_ARG_DST, *dst_memory_p}, {DNNL_ARG_DIFF_DST, *diff_dst_memory_p},
{MKLDNN_ARG_DIFF_DST, *diff_dst_memory_p}, {DNNL_ARG_DIFF_SRC, *diff_src_memory_p}});
{MKLDNN_ARG_DIFF_SRC, *diff_src_memory_p}});
astream.wait(); astream.wait();
in_x_grad->set_layout(framework::DataLayout::kMKLDNN); in_x_grad->set_layout(framework::DataLayout::kMKLDNN);
......
...@@ -20,10 +20,10 @@ namespace operators { ...@@ -20,10 +20,10 @@ namespace operators {
using framework::DataLayout; using framework::DataLayout;
using framework::Tensor; using framework::Tensor;
using framework::LoDTensor; using framework::LoDTensor;
using mkldnn::memory; using dnnl::memory;
using mkldnn::primitive; using dnnl::primitive;
using mkldnn::concat; using dnnl::concat;
using mkldnn::stream; using dnnl::stream;
using platform::to_void_cast; using platform::to_void_cast;
template <typename T> template <typename T>
...@@ -31,7 +31,7 @@ class StackMKLDNNHandler ...@@ -31,7 +31,7 @@ class StackMKLDNNHandler
: public platform::MKLDNNHandlerNoCachingT<T, dnnl::concat> { : public platform::MKLDNNHandlerNoCachingT<T, dnnl::concat> {
public: public:
StackMKLDNNHandler(const framework::ExecutionContext& ctx, StackMKLDNNHandler(const framework::ExecutionContext& ctx,
const mkldnn::engine mkldnn_engine, const dnnl::engine mkldnn_engine,
const std::vector<const Tensor*>& inputs, Tensor* output) const std::vector<const Tensor*>& inputs, Tensor* output)
: platform::MKLDNNHandlerNoCachingT<T, dnnl::concat>(mkldnn_engine, : platform::MKLDNNHandlerNoCachingT<T, dnnl::concat>(mkldnn_engine,
ctx.GetPlace()) { ctx.GetPlace()) {
...@@ -91,7 +91,7 @@ class StackMKLDNNHandler ...@@ -91,7 +91,7 @@ class StackMKLDNNHandler
dst_md, stack_axis, srcs_md, this->engine_)); dst_md, stack_axis, srcs_md, this->engine_));
} }
std::shared_ptr<mkldnn::memory> AcquireSrcMemory(const Tensor& input, int i) { std::shared_ptr<dnnl::memory> AcquireSrcMemory(const Tensor& input, int i) {
const T* input_data = input.data<T>(); const T* input_data = input.data<T>();
return this->AcquireMemoryFromPrimitive(this->fwd_pd_->src_desc(i), return this->AcquireMemoryFromPrimitive(this->fwd_pd_->src_desc(i),
to_void_cast<T>(input_data)); to_void_cast<T>(input_data));
...@@ -122,9 +122,9 @@ class StackMKLDNNOpKernel : public paddle::framework::OpKernel<T> { ...@@ -122,9 +122,9 @@ class StackMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
std::unordered_map<int, memory> args; std::unordered_map<int, memory> args;
for (size_t i = 0; i < multi_input.size(); ++i) { for (size_t i = 0; i < multi_input.size(); ++i) {
srcs.push_back(handler.AcquireSrcMemory(*(multi_input[i]), i)); srcs.push_back(handler.AcquireSrcMemory(*(multi_input[i]), i));
args.insert({MKLDNN_ARG_MULTIPLE_SRC + i, *(srcs.at(i))}); args.insert({DNNL_ARG_MULTIPLE_SRC + i, *(srcs.at(i))});
} }
args.insert({MKLDNN_ARG_DST, *dst_mem}); args.insert({DNNL_ARG_DST, *dst_mem});
concat_p->execute(astream, args); concat_p->execute(astream, args);
astream.wait(); astream.wait();
......
...@@ -48,7 +48,7 @@ template <typename T> ...@@ -48,7 +48,7 @@ template <typename T>
class SumMKLDNNHandler class SumMKLDNNHandler
: public platform::MKLDNNHandlerNoCachingT<T, dnnl::sum> { : public platform::MKLDNNHandlerNoCachingT<T, dnnl::sum> {
public: public:
SumMKLDNNHandler(mkldnn::engine engine, platform::Place cpu_place, SumMKLDNNHandler(dnnl::engine engine, platform::Place cpu_place,
const std::vector<framework::Variable*>& in_vars, const std::vector<framework::Variable*>& in_vars,
framework::LoDTensor* z) framework::LoDTensor* z)
...@@ -57,20 +57,20 @@ class SumMKLDNNHandler ...@@ -57,20 +57,20 @@ class SumMKLDNNHandler
auto dst_tz = framework::vectorize<int64_t>(z->dims()); auto dst_tz = framework::vectorize<int64_t>(z->dims());
auto src_tz = dst_tz; auto src_tz = dst_tz;
std::vector<mkldnn::memory::desc> srcs_md; std::vector<dnnl::memory::desc> srcs_md;
for (size_t i = 0; i < in_vars.size(); i++) { for (size_t i = 0; i < in_vars.size(); i++) {
auto& input_it = in_vars[i]->Get<framework::LoDTensor>(); auto& input_it = in_vars[i]->Get<framework::LoDTensor>();
if (input_it.numel() == 0) { if (input_it.numel() == 0) {
continue; continue;
} }
MKLDNNMemoryFormat input_format = input_it.format(); MKLDNNMemoryFormat input_format = input_it.format();
srcs_md.push_back(mkldnn::memory::desc( srcs_md.push_back(dnnl::memory::desc(
src_tz, platform::MKLDNNGetDataType<T>(), input_format)); src_tz, platform::MKLDNNGetDataType<T>(), input_format));
++num_inputs_; ++num_inputs_;
} }
std::vector<float> scales(num_inputs_, 1.0); std::vector<float> scales(num_inputs_, 1.0);
auto dst_md = mkldnn::memory::desc(dst_tz, platform::MKLDNNGetDataType<T>(), auto dst_md = dnnl::memory::desc(dst_tz, platform::MKLDNNGetDataType<T>(),
MKLDNNMemoryFormat::any); MKLDNNMemoryFormat::any);
this->AcquireForwardPrimitiveDescriptor(dst_md, scales, srcs_md); this->AcquireForwardPrimitiveDescriptor(dst_md, scales, srcs_md);
...@@ -79,14 +79,14 @@ class SumMKLDNNHandler ...@@ -79,14 +79,14 @@ class SumMKLDNNHandler
// (jczaja) sum oneDNN prim is not having .desc attribute so // (jczaja) sum oneDNN prim is not having .desc attribute so
// we cannot use base AcquireForwardPrimitiveDescriptor // we cannot use base AcquireForwardPrimitiveDescriptor
void AcquireForwardPrimitiveDescriptor( void AcquireForwardPrimitiveDescriptor(
const mkldnn::memory::desc& dst_md, const std::vector<float>& scales, const dnnl::memory::desc& dst_md, const std::vector<float>& scales,
const std::vector<mkldnn::memory::desc>& srcs_md) { const std::vector<dnnl::memory::desc>& srcs_md) {
this->fwd_pd_.reset( this->fwd_pd_.reset(
new dnnl::sum::primitive_desc(dst_md, scales, srcs_md, this->engine_)); new dnnl::sum::primitive_desc(dst_md, scales, srcs_md, this->engine_));
} }
std::shared_ptr<mkldnn::memory> AcquireSrcMemory( std::shared_ptr<dnnl::memory> AcquireSrcMemory(const framework::Tensor& input,
const framework::Tensor& input, int i) { int i) {
const T* input_data = input.data<T>(); const T* input_data = input.data<T>();
return this->AcquireMemoryFromPrimitive(this->fwd_pd_->src_desc(i), return this->AcquireMemoryFromPrimitive(this->fwd_pd_->src_desc(i),
to_void_cast<T>(input_data)); to_void_cast<T>(input_data));
...@@ -94,7 +94,7 @@ class SumMKLDNNHandler ...@@ -94,7 +94,7 @@ class SumMKLDNNHandler
using platform::MKLDNNHandlerNoCachingT<T, dnnl::sum>::AcquireDstMemory; using platform::MKLDNNHandlerNoCachingT<T, dnnl::sum>::AcquireDstMemory;
std::shared_ptr<mkldnn::memory> AcquireDstMemory(void) { std::shared_ptr<dnnl::memory> AcquireDstMemory(void) {
return this->AcquireMemoryFromPrimitive(this->fwd_pd_->dst_desc()); return this->AcquireMemoryFromPrimitive(this->fwd_pd_->dst_desc());
} }
...@@ -125,7 +125,7 @@ class SumMKLDNNOpKernel : public paddle::framework::OpKernel<T> { ...@@ -125,7 +125,7 @@ class SumMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
SumMKLDNNHandler<T> handler(mkldnn_engine, ctx.GetPlace(), in_vars, output); SumMKLDNNHandler<T> handler(mkldnn_engine, ctx.GetPlace(), in_vars, output);
// Create list of SRC MEMs // Create list of SRC MEMs
std::vector<std::shared_ptr<mkldnn::memory>> srcs_mem; std::vector<std::shared_ptr<dnnl::memory>> srcs_mem;
srcs_mem.reserve(handler.GetNumInputs()); srcs_mem.reserve(handler.GetNumInputs());
int input_index = 0; int input_index = 0;
for (size_t i = 0; i < in_vars.size(); i++) { for (size_t i = 0; i < in_vars.size(); i++) {
...@@ -147,11 +147,11 @@ class SumMKLDNNOpKernel : public paddle::framework::OpKernel<T> { ...@@ -147,11 +147,11 @@ class SumMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
auto sum_p = handler.AcquireForwardPrimitive(); auto sum_p = handler.AcquireForwardPrimitive();
std::unordered_map<int, mkldnn::memory> args; std::unordered_map<int, dnnl::memory> args;
for (size_t i = 0; i < srcs_mem.size(); ++i) { for (size_t i = 0; i < srcs_mem.size(); ++i) {
args.insert({MKLDNN_ARG_MULTIPLE_SRC + i, *(srcs_mem[i])}); args.insert({DNNL_ARG_MULTIPLE_SRC + i, *(srcs_mem[i])});
} }
args.insert({MKLDNN_ARG_DST, *dst_mem}); args.insert({DNNL_ARG_DST, *dst_mem});
auto& astream = platform::MKLDNNDeviceContext::tls().get_stream(); auto& astream = platform::MKLDNNDeviceContext::tls().get_stream();
sum_p->execute(astream, args); sum_p->execute(astream, args);
......
...@@ -589,7 +589,7 @@ MKLDNNDeviceContext::MKLDNNDeviceContext(CPUPlace place) ...@@ -589,7 +589,7 @@ MKLDNNDeviceContext::MKLDNNDeviceContext(CPUPlace place)
} }
MKLDNNDeviceContextThreadLocals::Body::Body() MKLDNNDeviceContextThreadLocals::Body::Body()
: cur_engine(mkldnn::engine::kind::cpu, 0), cur_stream(cur_engine) { : cur_engine(dnnl::engine::kind::cpu, 0), cur_stream(cur_engine) {
cur_mkldnn_session_id = kMKLDNNSessionID_Default; cur_mkldnn_session_id = kMKLDNNSessionID_Default;
cur_input_shape_str = ""; cur_input_shape_str = "";
cur_input_shape_cache_capacity = 1; cur_input_shape_cache_capacity = 1;
...@@ -647,11 +647,11 @@ void MKLDNNDeviceContextThreadLocals::Body::log_lib_version(void) { ...@@ -647,11 +647,11 @@ void MKLDNNDeviceContextThreadLocals::Body::log_lib_version(void) {
} }
} }
const mkldnn::engine& MKLDNNDeviceContextThreadLocals::Body::get_engine(void) { const dnnl::engine& MKLDNNDeviceContextThreadLocals::Body::get_engine(void) {
return cur_engine; return cur_engine;
} }
mkldnn::stream& MKLDNNDeviceContextThreadLocals::Body::get_stream(void) { dnnl::stream& MKLDNNDeviceContextThreadLocals::Body::get_stream(void) {
return cur_stream; return cur_stream;
} }
......
...@@ -46,8 +46,9 @@ limitations under the License. */ ...@@ -46,8 +46,9 @@ limitations under the License. */
#endif #endif
#ifdef PADDLE_WITH_MKLDNN #ifdef PADDLE_WITH_MKLDNN
#include "mkldnn.hpp" #include "dnnl.hpp"
#include "paddle/fluid/framework/data_layout.h" #include "paddle/fluid/framework/data_layout.h"
namespace mkldnn = dnnl;
#endif #endif
#include <map> #include <map>
...@@ -63,6 +64,10 @@ limitations under the License. */ ...@@ -63,6 +64,10 @@ limitations under the License. */
#endif #endif
#include "unsupported/Eigen/CXX11/Tensor" #include "unsupported/Eigen/CXX11/Tensor"
// This aias is required for now so that namespace name changes can be made to
// less than 20 files at a time. After all the names are changed it will be
// removed.
namespace Eigen { namespace Eigen {
struct DefaultDevice; struct DefaultDevice;
struct GpuDevice; struct GpuDevice;
...@@ -706,8 +711,8 @@ class MKLDNNDeviceContextThreadLocals { ...@@ -706,8 +711,8 @@ class MKLDNNDeviceContextThreadLocals {
// know for converting MKL-DNN Tensor to non MKL-DNN // know for converting MKL-DNN Tensor to non MKL-DNN
paddle::framework::DataLayout cur_paddle_data_layout; paddle::framework::DataLayout cur_paddle_data_layout;
// MKL-DNN stream used for execution of primitives (per-thread) // MKL-DNN stream used for execution of primitives (per-thread)
mkldnn::engine cur_engine; dnnl::engine cur_engine;
mkldnn::stream cur_stream; dnnl::stream cur_stream;
std::string key_suffix; // Key identifying current Executor std::string key_suffix; // Key identifying current Executor
bool key_attach_thread_id = true; bool key_attach_thread_id = true;
void* exec_ptr_ = nullptr; void* exec_ptr_ = nullptr;
...@@ -721,8 +726,8 @@ class MKLDNNDeviceContextThreadLocals { ...@@ -721,8 +726,8 @@ class MKLDNNDeviceContextThreadLocals {
void set_cur_paddle_data_layout(framework::DataLayout dl); void set_cur_paddle_data_layout(framework::DataLayout dl);
framework::DataLayout get_cur_paddle_data_layout(void); framework::DataLayout get_cur_paddle_data_layout(void);
void log_lib_version(void); void log_lib_version(void);
const mkldnn::engine& get_engine(void); const dnnl::engine& get_engine(void);
mkldnn::stream& get_stream(void); dnnl::stream& get_stream(void);
void set_key_suffix(const std::string& suffix) { key_suffix = suffix; } void set_key_suffix(const std::string& suffix) { key_suffix = suffix; }
const std::string& get_key_suffix(void) const { return key_suffix; } const std::string& get_key_suffix(void) const { return key_suffix; }
void disable_tid_in_key(void) { key_attach_thread_id = false; } void disable_tid_in_key(void) { key_attach_thread_id = false; }
...@@ -776,7 +781,7 @@ class MKLDNNDeviceContext : public CPUDeviceContext { ...@@ -776,7 +781,7 @@ class MKLDNNDeviceContext : public CPUDeviceContext {
explicit MKLDNNDeviceContext(CPUPlace place); explicit MKLDNNDeviceContext(CPUPlace place);
/* \brief Get the active engine */ /* \brief Get the active engine */
const mkldnn::engine& GetEngine() const { return tls().get_engine(); } const dnnl::engine& GetEngine() const { return tls().get_engine(); }
// Register object to currently used executor's map // Register object to currently used executor's map
void LinkEntryWithExecutor(BlobPtr_t<KeyBlob>, KeyBlob::iterator) const; void LinkEntryWithExecutor(BlobPtr_t<KeyBlob>, KeyBlob::iterator) const;
......
...@@ -334,34 +334,34 @@ inline dnnl::memory::format_tag GetMKLDNNFormat(const dnnl::memory memory) { ...@@ -334,34 +334,34 @@ inline dnnl::memory::format_tag GetMKLDNNFormat(const dnnl::memory memory) {
return GetMKLDNNFormat(mem_desc); return GetMKLDNNFormat(mem_desc);
} }
inline mkldnn::memory::format_tag GetPlainMKLDNNFormat(int tensor_rank) { inline dnnl::memory::format_tag GetPlainMKLDNNFormat(int tensor_rank) {
switch (tensor_rank) { switch (tensor_rank) {
case 1: case 1:
return mkldnn::memory::format_tag::a; return dnnl::memory::format_tag::a;
break; break;
case 2: case 2:
return mkldnn::memory::format_tag::ab; return dnnl::memory::format_tag::ab;
break; break;
case 3: case 3:
return mkldnn::memory::format_tag::abc; return dnnl::memory::format_tag::abc;
break; break;
case 4: case 4:
return mkldnn::memory::format_tag::abcd; return dnnl::memory::format_tag::abcd;
break; break;
case 5: case 5:
return mkldnn::memory::format_tag::abcde; return dnnl::memory::format_tag::abcde;
break; break;
case 6: case 6:
return mkldnn::memory::format_tag::abcdef; return dnnl::memory::format_tag::abcdef;
break; break;
case 7: case 7:
return mkldnn::memory::format_tag::abcdefg; return dnnl::memory::format_tag::abcdefg;
break; break;
case 8: case 8:
return mkldnn::memory::format_tag::abcdefgh; return dnnl::memory::format_tag::abcdefgh;
break; break;
case 9: case 9:
return mkldnn::memory::format_tag::abcdefghi; return dnnl::memory::format_tag::abcdefghi;
break; break;
default: default:
PADDLE_THROW(platform::errors::Unimplemented( PADDLE_THROW(platform::errors::Unimplemented(
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册