未验证 提交 7db7a0ec 编写于 作者: P piotrekobiIntel 提交者: GitHub

Changed second batch of deprecated mkldnn header and function names to new oneDNN names (#37351)

* Add second batch of deprecated mkldnn namespace and macro changes

* Unlock CI

* Fix temporary namespace alias placing
上级 10d8d6b6
......@@ -90,9 +90,9 @@ class Tensor {
#ifdef PADDLE_WITH_MKLDNN
public:
inline mkldnn::memory::format_tag format() const { return format_; }
inline dnnl::memory::format_tag format() const { return format_; }
inline void set_format(const mkldnn::memory::format_tag format) {
inline void set_format(const dnnl::memory::format_tag format) {
format_ = format;
}
......@@ -106,7 +106,7 @@ class Tensor {
* this field.
*/
mkldnn::memory::format_tag format_ = mkldnn::memory::format_tag::undef;
dnnl::memory::format_tag format_ = dnnl::memory::format_tag::undef;
#endif
public:
......
......@@ -30,9 +30,9 @@ namespace operators {
using framework::DataLayout;
using framework::Tensor;
using mkldnn::memory;
using mkldnn::primitive;
using mkldnn::stream;
using dnnl::memory;
using dnnl::primitive;
using dnnl::stream;
using platform::GetMKLDNNFormat;
using platform::MKLDNNDeviceContext;
using platform::to_void_cast;
......@@ -75,7 +75,7 @@ class MKLDNNActivationGradKernel
template <typename T>
void eltwise_forward(const framework::ExecutionContext &ctx,
mkldnn::algorithm algorithm) {
dnnl::algorithm algorithm) {
PADDLE_ENFORCE_EQ(platform::is_cpu_place(ctx.GetPlace()), true,
paddle::platform::errors::PreconditionNotMet(
"Operator DNNL eletwise_forward must use CPUPlace"));
......@@ -101,8 +101,8 @@ void eltwise_forward(const framework::ExecutionContext &ctx,
auto activation_p = handler.AcquireForwardPrimitive();
auto &astream = paddle::platform::MKLDNNDeviceContext::tls().get_stream();
activation_p->execute(astream, {{MKLDNN_ARG_FROM, *src_memory_p},
{MKLDNN_ARG_TO, *dst_memory_p}});
activation_p->execute(
astream, {{DNNL_ARG_FROM, *src_memory_p}, {DNNL_ARG_TO, *dst_memory_p}});
astream.wait();
y->set_layout(DataLayout::kMKLDNN);
......@@ -111,7 +111,7 @@ void eltwise_forward(const framework::ExecutionContext &ctx,
template <typename T>
void eltwise_grad(const framework::ExecutionContext &ctx,
mkldnn::algorithm algorithm) {
dnnl::algorithm algorithm) {
auto &dev_ctx = ctx.template device_context<MKLDNNDeviceContext>();
const auto &mkldnn_engine = dev_ctx.GetEngine();
......@@ -129,23 +129,23 @@ void eltwise_grad(const framework::ExecutionContext &ctx,
auto &astream = paddle::platform::MKLDNNDeviceContext::tls().get_stream();
activation_backward_p->execute(astream,
{{MKLDNN_ARG_SRC, *src_memory_p},
{MKLDNN_ARG_DIFF_DST, *diff_dst_memory_p},
{MKLDNN_ARG_DIFF_SRC, *diff_src_memory_p}});
{{DNNL_ARG_SRC, *src_memory_p},
{DNNL_ARG_DIFF_DST, *diff_dst_memory_p},
{DNNL_ARG_DIFF_SRC, *diff_src_memory_p}});
astream.wait();
diff_x->set_layout(DataLayout::kMKLDNN);
diff_x->set_format(GetMKLDNNFormat(*diff_src_memory_p));
}
template <typename T, mkldnn::algorithm algorithm>
template <typename T, dnnl::algorithm algorithm>
struct MKLDNNActivationFunc : public BaseActivationFunctor<T> {
void operator()(const framework::ExecutionContext &ctx) const {
eltwise_forward<T>(ctx, algorithm);
}
};
template <typename T, mkldnn::algorithm algorithm>
template <typename T, dnnl::algorithm algorithm>
struct MKLDNNActivationGradFunc : public BaseActivationFunctor<T> {
void operator()(const framework::ExecutionContext &ctx) const {
eltwise_grad<T>(ctx, algorithm);
......@@ -157,9 +157,9 @@ struct GeluMKLDNNFunctor : public BaseActivationFunctor<T> {
void operator()(const framework::ExecutionContext &ctx) const {
const bool approximate = ctx.Attr<bool>("approximate");
if (approximate) {
eltwise_forward<T>(ctx, mkldnn::algorithm::eltwise_gelu_tanh);
eltwise_forward<T>(ctx, dnnl::algorithm::eltwise_gelu_tanh);
} else {
eltwise_forward<T>(ctx, mkldnn::algorithm::eltwise_gelu_erf);
eltwise_forward<T>(ctx, dnnl::algorithm::eltwise_gelu_erf);
}
}
};
......@@ -169,9 +169,9 @@ struct GeluMKLDNNGradFunctor : public BaseActivationFunctor<T> {
void operator()(const framework::ExecutionContext &ctx) const {
const bool approximate = ctx.Attr<bool>("approximate");
if (approximate) {
eltwise_grad<T>(ctx, mkldnn::algorithm::eltwise_gelu_tanh);
eltwise_grad<T>(ctx, dnnl::algorithm::eltwise_gelu_tanh);
} else {
eltwise_grad<T>(ctx, mkldnn::algorithm::eltwise_gelu_erf);
eltwise_grad<T>(ctx, dnnl::algorithm::eltwise_gelu_erf);
}
}
};
......@@ -185,75 +185,73 @@ struct SoftplusMKLDNNFunctor : public BaseActivationFunctor<T> {
template <typename T>
using ReluMKLDNNFunctor =
MKLDNNActivationFunc<T, mkldnn::algorithm::eltwise_relu>;
MKLDNNActivationFunc<T, dnnl::algorithm::eltwise_relu>;
template <typename T>
using Relu6MKLDNNFunctor =
MKLDNNActivationFunc<T, mkldnn::algorithm::eltwise_bounded_relu>;
MKLDNNActivationFunc<T, dnnl::algorithm::eltwise_bounded_relu>;
template <typename T>
using SwishMKLDNNFunctor =
MKLDNNActivationFunc<T, mkldnn::algorithm::eltwise_swish>;
MKLDNNActivationFunc<T, dnnl::algorithm::eltwise_swish>;
template <typename T>
using HardSwishMKLDNNFunctor =
MKLDNNActivationFunc<T, mkldnn::algorithm::eltwise_hardswish>;
MKLDNNActivationFunc<T, dnnl::algorithm::eltwise_hardswish>;
template <typename T>
using SigmoidMKLDNNFunctor =
MKLDNNActivationFunc<T, mkldnn::algorithm::eltwise_logistic>;
MKLDNNActivationFunc<T, dnnl::algorithm::eltwise_logistic>;
template <typename T>
using TanhMKLDNNFunctor =
MKLDNNActivationFunc<T, mkldnn::algorithm::eltwise_tanh>;
MKLDNNActivationFunc<T, dnnl::algorithm::eltwise_tanh>;
template <typename T>
using SqrtMKLDNNFunctor =
MKLDNNActivationFunc<T, mkldnn::algorithm::eltwise_sqrt>;
MKLDNNActivationFunc<T, dnnl::algorithm::eltwise_sqrt>;
template <typename T>
using AbsMKLDNNFunctor =
MKLDNNActivationFunc<T, mkldnn::algorithm::eltwise_abs>;
using AbsMKLDNNFunctor = MKLDNNActivationFunc<T, dnnl::algorithm::eltwise_abs>;
template <typename T>
using EluMKLDNNFunctor =
MKLDNNActivationFunc<T, mkldnn::algorithm::eltwise_elu>;
using EluMKLDNNFunctor = MKLDNNActivationFunc<T, dnnl::algorithm::eltwise_elu>;
template <typename T>
using ReluMKLDNNGradFunctor =
MKLDNNActivationGradFunc<T, mkldnn::algorithm::eltwise_relu>;
MKLDNNActivationGradFunc<T, dnnl::algorithm::eltwise_relu>;
template <typename T>
using Relu6MKLDNNGradFunctor =
MKLDNNActivationGradFunc<T, mkldnn::algorithm::eltwise_bounded_relu>;
MKLDNNActivationGradFunc<T, dnnl::algorithm::eltwise_bounded_relu>;
template <typename T>
using SwishMKLDNNGradFunctor =
MKLDNNActivationGradFunc<T, mkldnn::algorithm::eltwise_swish>;
MKLDNNActivationGradFunc<T, dnnl::algorithm::eltwise_swish>;
template <typename T>
using HardSwishMKLDNNGradFunctor =
MKLDNNActivationGradFunc<T, mkldnn::algorithm::eltwise_hardswish>;
MKLDNNActivationGradFunc<T, dnnl::algorithm::eltwise_hardswish>;
template <typename T>
using SigmoidMKLDNNGradFunctor =
MKLDNNActivationGradFunc<T, mkldnn::algorithm::eltwise_logistic>;
MKLDNNActivationGradFunc<T, dnnl::algorithm::eltwise_logistic>;
template <typename T>
using TanhMKLDNNGradFunctor =
MKLDNNActivationGradFunc<T, mkldnn::algorithm::eltwise_tanh>;
MKLDNNActivationGradFunc<T, dnnl::algorithm::eltwise_tanh>;
template <typename T>
using SqrtMKLDNNGradFunctor =
MKLDNNActivationGradFunc<T, mkldnn::algorithm::eltwise_sqrt>;
MKLDNNActivationGradFunc<T, dnnl::algorithm::eltwise_sqrt>;
template <typename T>
using AbsMKLDNNGradFunctor =
MKLDNNActivationGradFunc<T, mkldnn::algorithm::eltwise_abs>;
MKLDNNActivationGradFunc<T, dnnl::algorithm::eltwise_abs>;
template <typename T>
using EluMKLDNNGradFunctor =
MKLDNNActivationGradFunc<T, mkldnn::algorithm::eltwise_elu>;
MKLDNNActivationGradFunc<T, dnnl::algorithm::eltwise_elu>;
} // namespace operators
} // namespace paddle
......
......@@ -27,24 +27,23 @@ class MKLDNNDeviceContext;
namespace paddle {
namespace operators {
using mkldnn::memory;
using mkldnn::primitive;
using mkldnn::reorder;
using mkldnn::stream;
using dnnl::memory;
using dnnl::primitive;
using dnnl::reorder;
using dnnl::stream;
using paddle::platform::MKLDNNDeviceContext;
using platform::to_void_cast;
template <typename T>
class BatchNormMKLDNNHandler : public platform::MKLDNNHandlerNoCachingT<
T, mkldnn::batch_normalization_forward,
mkldnn::batch_normalization_backward> {
T, dnnl::batch_normalization_forward,
dnnl::batch_normalization_backward> {
public:
BatchNormMKLDNNHandler(const paddle::framework::ExecutionContext &ctx,
const mkldnn::engine mkldnn_engine, const Tensor *x,
const dnnl::engine mkldnn_engine, const Tensor *x,
const bool global_stats, const bool test_mode)
: platform::MKLDNNHandlerNoCachingT<T,
mkldnn::batch_normalization_forward,
mkldnn::batch_normalization_backward>(
: platform::MKLDNNHandlerNoCachingT<T, dnnl::batch_normalization_forward,
dnnl::batch_normalization_backward>(
mkldnn_engine, ctx.GetPlace()) {
const float epsilon = ctx.Attr<float>("epsilon");
const bool fuse_with_relu = ctx.HasAttr("fuse_with_relu")
......@@ -66,28 +65,27 @@ class BatchNormMKLDNNHandler : public platform::MKLDNNHandlerNoCachingT<
auto src_tz = paddle::framework::vectorize(x->dims());
// Flags are added by bitwise OR operation
auto flags = mkldnn::normalization_flags::use_scale_shift; // 001
auto flags = dnnl::normalization_flags::use_scale_shift; // 001
if (global_stats)
flags |= mkldnn::normalization_flags::use_global_stats; // 010
flags |= dnnl::normalization_flags::use_global_stats; // 010
if (fuse_with_relu && test_mode)
flags |= mkldnn::normalization_flags::fuse_norm_relu; // 100
flags |= dnnl::normalization_flags::fuse_norm_relu; // 100
auto md = mkldnn::memory::desc(
auto md = dnnl::memory::desc(
src_tz, platform::MKLDNNGetDataType<T>(),
platform::MKLDNNFormatForSize(src_tz.size(), x->format()));
this->AcquireForwardPrimitiveDescriptor(
global_stats == true ? mkldnn::prop_kind::forward_scoring
: mkldnn::prop_kind::forward_training,
global_stats == true ? dnnl::prop_kind::forward_scoring
: dnnl::prop_kind::forward_training,
md, epsilon, flags);
}
BatchNormMKLDNNHandler(const paddle::framework::ExecutionContext &ctx,
const mkldnn::engine mkldnn_engine, const Tensor *in_x,
const dnnl::engine mkldnn_engine, const Tensor *in_x,
const Tensor *scale, const Tensor *out_grad)
: platform::MKLDNNHandlerNoCachingT<T,
mkldnn::batch_normalization_forward,
mkldnn::batch_normalization_backward>(
: platform::MKLDNNHandlerNoCachingT<T, dnnl::batch_normalization_forward,
dnnl::batch_normalization_backward>(
mkldnn_engine, ctx.GetPlace()) {
PADDLE_ENFORCE_EQ(out_grad->layout(), DataLayout::kMKLDNN,
platform::errors::InvalidArgument(
......@@ -112,22 +110,22 @@ class BatchNormMKLDNNHandler : public platform::MKLDNNHandlerNoCachingT<
auto dims = framework::vectorize(in_x->dims());
auto diff_dst_md =
mkldnn::memory::desc(dims, platform::MKLDNNGetDataType<T>(), diff_fmt);
dnnl::memory::desc(dims, platform::MKLDNNGetDataType<T>(), diff_fmt);
auto src_md =
mkldnn::memory::desc(dims, platform::MKLDNNGetDataType<T>(), src_fmt);
dnnl::memory::desc(dims, platform::MKLDNNGetDataType<T>(), src_fmt);
const float epsilon = ctx.Attr<float>("epsilon");
this->AcquireForwardPrimitiveDescriptor(
mkldnn::prop_kind::forward_training, src_md, epsilon,
mkldnn::normalization_flags::use_scale_shift);
dnnl::prop_kind::forward_training, src_md, epsilon,
dnnl::normalization_flags::use_scale_shift);
this->AcquireBackwardPrimitiveDescriptor(
mkldnn::prop_kind::backward, diff_dst_md, src_md, epsilon,
mkldnn::normalization_flags::use_scale_shift);
dnnl::prop_kind::backward, diff_dst_md, src_md, epsilon,
dnnl::normalization_flags::use_scale_shift);
}
std::shared_ptr<mkldnn::memory> AcquireScaleShiftMemory(const Tensor *scale,
const Tensor *shift) {
std::shared_ptr<dnnl::memory> AcquireScaleShiftMemory(const Tensor *scale,
const Tensor *shift) {
auto scale_tz = paddle::framework::vectorize(scale->dims());
const unsigned int C = scale_tz[0];
PADDLE_ENFORCE_EQ(
......@@ -147,34 +145,34 @@ class BatchNormMKLDNNHandler : public platform::MKLDNNHandlerNoCachingT<
return scaleshift_memory;
}
std::shared_ptr<mkldnn::memory> AcquireDiffScaleShiftMemory(
std::shared_ptr<dnnl::memory> AcquireDiffScaleShiftMemory(
T *diff_scaleshift_data) {
return this->AcquireMemoryFromPrimitive(this->bwd_pd_->diff_weights_desc(),
diff_scaleshift_data);
}
std::shared_ptr<mkldnn::memory> AcquireMeanMemory(
std::shared_ptr<dnnl::memory> AcquireMeanMemory(
const framework::Tensor *mean) {
const T *mean_data = mean->data<T>();
return this->AcquireMemoryFromPrimitive(this->fwd_pd_->mean_desc(),
to_void_cast<T>(mean_data));
}
std::shared_ptr<mkldnn::memory> AcquireMeanMemory(framework::Tensor *mean) {
std::shared_ptr<dnnl::memory> AcquireMeanMemory(framework::Tensor *mean) {
T *mean_data = mean->mutable_data<T>(this->place_,
this->fwd_pd_->mean_desc().get_size());
return this->AcquireMemoryFromPrimitive(this->fwd_pd_->mean_desc(),
mean_data);
}
std::shared_ptr<mkldnn::memory> AcquireVarianceMemory(
std::shared_ptr<dnnl::memory> AcquireVarianceMemory(
const framework::Tensor *variance) {
const T *variance_data = variance->data<T>();
return this->AcquireMemoryFromPrimitive(this->fwd_pd_->variance_desc(),
to_void_cast<T>(variance_data));
}
std::shared_ptr<mkldnn::memory> AcquireVarianceMemory(
std::shared_ptr<dnnl::memory> AcquireVarianceMemory(
framework::Tensor *variance) {
T *variance_data = variance->mutable_data<T>(
this->place_, this->fwd_pd_->variance_desc().get_size());
......@@ -233,12 +231,11 @@ class BatchNormMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
y->set_format(platform::GetMKLDNNFormat(*dst_memory));
auto &astream = platform::MKLDNNDeviceContext::tls().get_stream();
batch_norm_p->execute(astream,
{{MKLDNN_ARG_SRC, *src_memory},
{MKLDNN_ARG_SCALE_SHIFT, *scaleshift_memory},
{MKLDNN_ARG_MEAN, *mean_memory},
{MKLDNN_ARG_VARIANCE, *variance_memory},
{MKLDNN_ARG_DST, *dst_memory}});
batch_norm_p->execute(astream, {{DNNL_ARG_SRC, *src_memory},
{DNNL_ARG_SCALE_SHIFT, *scaleshift_memory},
{DNNL_ARG_MEAN, *mean_memory},
{DNNL_ARG_VARIANCE, *variance_memory},
{DNNL_ARG_DST, *dst_memory}});
astream.wait();
if (!global_stats) {
......@@ -307,13 +304,13 @@ class BatchNormMKLDNNGradOpKernel : public paddle::framework::OpKernel<T> {
auto &astream = platform::MKLDNNDeviceContext::tls().get_stream();
batch_norm_bwd_p->execute(
astream, {{MKLDNN_ARG_SRC, *src_memory},
{MKLDNN_ARG_MEAN, *mean_memory},
{MKLDNN_ARG_VARIANCE, *variance_memory},
{MKLDNN_ARG_DIFF_DST, *diff_dst_memory},
{MKLDNN_ARG_SCALE_SHIFT, *scaleshift_memory},
{MKLDNN_ARG_DIFF_SRC, *diff_src_memory},
{MKLDNN_ARG_DIFF_SCALE_SHIFT, *diff_scaleshift_memory}});
astream, {{DNNL_ARG_SRC, *src_memory},
{DNNL_ARG_MEAN, *mean_memory},
{DNNL_ARG_VARIANCE, *variance_memory},
{DNNL_ARG_DIFF_DST, *diff_dst_memory},
{DNNL_ARG_SCALE_SHIFT, *scaleshift_memory},
{DNNL_ARG_DIFF_SRC, *diff_src_memory},
{DNNL_ARG_DIFF_SCALE_SHIFT, *diff_scaleshift_memory}});
astream.wait();
T *diff_scale_data = diff_scale->mutable_data<T>(ctx.GetPlace());
......
......@@ -34,7 +34,7 @@ class ClipMKLDNNKernel : public paddle::framework::OpKernel<T> {
auto* out = ctx.Output<Tensor>("Out");
paddle::platform::ActivationMKLDNNHandler<T> handler(
mkldnn::algorithm::eltwise_clip_v2, ctx, mkldnn_engine, ctx.GetPlace(),
dnnl::algorithm::eltwise_clip_v2, ctx, mkldnn_engine, ctx.GetPlace(),
x);
auto src_memory_p = handler.AcquireSrcMemory(x);
......@@ -42,8 +42,8 @@ class ClipMKLDNNKernel : public paddle::framework::OpKernel<T> {
auto activation_p = handler.AcquireForwardPrimitive();
auto& astream = paddle::platform::MKLDNNDeviceContext::tls().get_stream();
activation_p->execute(astream, {{MKLDNN_ARG_FROM, *src_memory_p},
{MKLDNN_ARG_TO, *dst_memory_p}});
activation_p->execute(astream, {{DNNL_ARG_FROM, *src_memory_p},
{DNNL_ARG_TO, *dst_memory_p}});
astream.wait();
out->set_layout(paddle::framework::DataLayout::kMKLDNN);
......@@ -68,8 +68,8 @@ class ClipGradMKLDNNKernel : public paddle::framework::OpKernel<T> {
auto* dout = ctx.Input<Tensor>(paddle::framework::GradVarName("Out"));
paddle::platform::ActivationMKLDNNHandler<T> handler(
mkldnn::algorithm::eltwise_clip_v2, ctx, mkldnn_engine, ctx.GetPlace(),
x, dout);
dnnl::algorithm::eltwise_clip_v2, ctx, mkldnn_engine, ctx.GetPlace(), x,
dout);
auto src_memory_p = handler.AcquireBackwardSrcMemory(x);
auto diff_dst_memory_p = handler.AcquireDiffDstMemory(dout);
......@@ -78,9 +78,9 @@ class ClipGradMKLDNNKernel : public paddle::framework::OpKernel<T> {
auto& astream = paddle::platform::MKLDNNDeviceContext::tls().get_stream();
activation_backward_p->execute(astream,
{{MKLDNN_ARG_SRC, *src_memory_p},
{MKLDNN_ARG_DIFF_DST, *diff_dst_memory_p},
{MKLDNN_ARG_DIFF_SRC, *diff_src_memory_p}});
{{DNNL_ARG_SRC, *src_memory_p},
{DNNL_ARG_DIFF_DST, *diff_dst_memory_p},
{DNNL_ARG_DIFF_SRC, *diff_src_memory_p}});
astream.wait();
dx->set_layout(paddle::framework::DataLayout::kMKLDNN);
......
......@@ -24,10 +24,10 @@ namespace operators {
using framework::DataLayout;
using framework::Tensor;
using framework::LoDTensor;
using mkldnn::memory;
using mkldnn::primitive;
using mkldnn::concat;
using mkldnn::stream;
using dnnl::memory;
using dnnl::primitive;
using dnnl::concat;
using dnnl::stream;
using platform::to_void_cast;
template <typename T>
......@@ -35,7 +35,7 @@ class ConcatMKLDNNHandler
: public platform::MKLDNNHandlerNoCachingT<T, dnnl::concat> {
public:
ConcatMKLDNNHandler(const framework::ExecutionContext& ctx,
const mkldnn::engine mkldnn_engine,
const dnnl::engine mkldnn_engine,
const std::vector<const Tensor*>& inputs, Tensor* output)
: platform::MKLDNNHandlerNoCachingT<T, dnnl::concat>(mkldnn_engine,
ctx.GetPlace()) {
......@@ -86,7 +86,7 @@ class ConcatMKLDNNHandler
dst_md, concat_axis, srcs_md, this->engine_));
}
std::shared_ptr<mkldnn::memory> AcquireSrcMemory(const Tensor& input, int i) {
std::shared_ptr<dnnl::memory> AcquireSrcMemory(const Tensor& input, int i) {
const T* input_data = input.data<T>();
return this->AcquireMemoryFromPrimitive(this->fwd_pd_->src_desc(i),
to_void_cast<T>(input_data));
......@@ -139,9 +139,9 @@ class ConcatMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
std::unordered_map<int, memory> args;
for (size_t i = 0; i < multi_input.size(); ++i) {
srcs.push_back(handler.AcquireSrcMemory(*(multi_input[i]), i));
args.insert({MKLDNN_ARG_MULTIPLE_SRC + i, *(srcs.at(i))});
args.insert({DNNL_ARG_MULTIPLE_SRC + i, *(srcs.at(i))});
}
args.insert({MKLDNN_ARG_DST, *dst_mem});
args.insert({DNNL_ARG_DST, *dst_mem});
concat_p->execute(astream, args);
astream.wait();
......@@ -185,7 +185,7 @@ class ConcatGradMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
std::vector<int64_t> offset(dout_vec_dims.size(), 0);
mkldnn::memory::data_type dout_type =
dnnl::memory::data_type dout_type =
framework::ToMKLDNNDataType(dout->type());
platform::ReorderMKLDNNHandler reorder_handler(dout_vec_dims, dout->type(),
dout_type, onednn_engine);
......
......@@ -25,8 +25,7 @@ namespace operators {
using Tensor = framework::Tensor;
using framework::DataLayout;
inline mkldnn::memory::dims GetWeightsTz(const Tensor* filter,
const int groups) {
inline dnnl::memory::dims GetWeightsTz(const Tensor* filter, const int groups) {
auto iohw_weights_tz = framework::vectorize(filter->dims());
auto weights_tz = iohw_weights_tz;
......@@ -40,14 +39,13 @@ inline mkldnn::memory::dims GetWeightsTz(const Tensor* filter,
template <typename T, typename K, typename T_out>
class ConvTransposeMKLDNNHandlerT
: public platform::MKLDNNHandlerNoCachingT<T,
mkldnn::deconvolution_forward> {
: public platform::MKLDNNHandlerNoCachingT<T, dnnl::deconvolution_forward> {
public:
ConvTransposeMKLDNNHandlerT(const framework::ExecutionContext& ctx,
const mkldnn::engine mkldnn_engine,
const dnnl::engine mkldnn_engine,
const Tensor* input, const Tensor* filter,
const Tensor* bias, Tensor* output)
: platform::MKLDNNHandlerNoCachingT<T, mkldnn::deconvolution_forward>(
: platform::MKLDNNHandlerNoCachingT<T, dnnl::deconvolution_forward>(
mkldnn_engine, ctx.GetPlace()),
is_test_(ctx.Attr<bool>("is_test")) {
PADDLE_ENFORCE_EQ(is_test_, true,
......@@ -103,13 +101,13 @@ class ConvTransposeMKLDNNHandlerT
}
std::vector<int> strides_temp = ctx.Attr<std::vector<int>>("strides");
mkldnn::memory::dims strides(begin(strides_temp), end(strides_temp));
dnnl::memory::dims strides(begin(strides_temp), end(strides_temp));
std::vector<int> paddings_temp = ctx.Attr<std::vector<int>>("paddings");
mkldnn::memory::dims paddings(begin(paddings_temp), end(paddings_temp));
dnnl::memory::dims paddings(begin(paddings_temp), end(paddings_temp));
std::vector<int> dilations_temp = ctx.Attr<std::vector<int>>("dilations");
mkldnn::memory::dims dilations(begin(dilations_temp), end(dilations_temp));
dnnl::memory::dims dilations(begin(dilations_temp), end(dilations_temp));
int groups = ctx.Attr<int>("groups");
std::string padding_algorithm = ctx.Attr<std::string>("padding_algorithm");
......@@ -149,10 +147,10 @@ class ConvTransposeMKLDNNHandlerT
const float fuse_alpha = ctx.Attr<float>("fuse_alpha");
const float fuse_beta = ctx.Attr<float>("fuse_beta");
auto data_type = mkldnn::memory::data_type::f32;
auto data_type = dnnl::memory::data_type::f32;
if (ctx.Attr<std::string>("mkldnn_data_type") == "bfloat16" ||
std::is_same<T_out, platform::bfloat16>::value)
data_type = mkldnn::memory::data_type::bf16;
data_type = dnnl::memory::data_type::bf16;
const auto src_md =
platform::MKLDNNMemDesc(src_tz, data_type, chosen_memory_format);
......@@ -161,10 +159,10 @@ class ConvTransposeMKLDNNHandlerT
const auto dst_md = platform::MKLDNNMemDesc(
dst_tz, platform::MKLDNNGetDataType<T_out>(), chosen_memory_format);
const mkldnn::primitive_attr conv_trans_attr =
const dnnl::primitive_attr conv_trans_attr =
CreatePostOps(fuse_activation, fuse_alpha, fuse_beta);
auto fwd_prop_kind = is_test_ ? mkldnn::prop_kind::forward_inference
: mkldnn::prop_kind::forward_training;
auto fwd_prop_kind = is_test_ ? dnnl::prop_kind::forward_inference
: dnnl::prop_kind::forward_training;
if (bias) {
std::vector<int64_t> bias_tz = framework::vectorize(bias->dims());
const auto bias_md =
......@@ -181,44 +179,43 @@ class ConvTransposeMKLDNNHandlerT
}
}
mkldnn::primitive_attr CreatePostOps(const std::string& fuse_activation,
const float& fuse_alpha,
const float& fuse_beta) {
mkldnn::primitive_attr conv_attr;
mkldnn::post_ops post_operations;
dnnl::primitive_attr CreatePostOps(const std::string& fuse_activation,
const float& fuse_alpha,
const float& fuse_beta) {
dnnl::primitive_attr conv_attr;
dnnl::post_ops post_operations;
// Fusion with ReLU layer is executed through the PostOps feature. Create a
// PostOps object and configure it to execute an eltwise relu operation.
if (fuse_activation == "relu" || fuse_activation == "leaky_relu") {
constexpr float scale = 1.0f;
post_operations.append_eltwise(scale, mkldnn::algorithm::eltwise_relu,
post_operations.append_eltwise(scale, dnnl::algorithm::eltwise_relu,
fuse_alpha, fuse_beta);
} else if (fuse_activation == "relu6") {
constexpr float scale = 1.0f;
post_operations.append_eltwise(scale,
mkldnn::algorithm::eltwise_bounded_relu,
fuse_alpha, fuse_beta);
post_operations.append_eltwise(
scale, dnnl::algorithm::eltwise_bounded_relu, fuse_alpha, fuse_beta);
} else if (fuse_activation == "swish") {
constexpr float scale = 1.0f;
post_operations.append_eltwise(scale, mkldnn::algorithm::eltwise_swish,
post_operations.append_eltwise(scale, dnnl::algorithm::eltwise_swish,
fuse_alpha, fuse_beta);
}
conv_attr.set_post_ops(post_operations);
return conv_attr;
}
std::shared_ptr<mkldnn::memory> AcquireSrcMemoryWithReorder(
std::shared_ptr<dnnl::memory> AcquireSrcMemoryWithReorder(
const framework::Tensor* input) {
const T* input_data = input->data<T>();
auto user_src_md = platform::MKLDNNMemDesc(
framework::vectorize(input->dims()), platform::MKLDNNGetDataType<T>(),
input->format());
return platform::MKLDNNHandlerNoCachingT<T, mkldnn::deconvolution_forward>::
return platform::MKLDNNHandlerNoCachingT<T, dnnl::deconvolution_forward>::
AcquireMemoryWithReorder(user_src_md, this->fwd_pd_->src_desc(),
platform::to_void_cast<T>(input_data));
}
std::shared_ptr<mkldnn::memory> AcquireWeightsMemoryWithReorder(
std::shared_ptr<dnnl::memory> AcquireWeightsMemoryWithReorder(
const platform::MKLDNNDeviceContext& dev_ctx, const std::string& key,
const framework::Tensor* filter, const int& groups) {
const K* filter_data = filter->data<K>();
......@@ -236,12 +233,12 @@ class ConvTransposeMKLDNNHandlerT
}
template <typename F = T>
std::shared_ptr<mkldnn::memory> AcquireMemoryWithReorder(
std::shared_ptr<dnnl::memory> AcquireMemoryWithReorder(
const platform::MKLDNNDeviceContext& dev_ctx,
const mkldnn::memory::desc& user_md,
const mkldnn::memory::desc& target_md, void* ptr, const std::string& key,
const std::string& suffix, bool is_persistent = false,
const std::vector<float>& scale_data = {1.0f}, int mask = 0) {
const dnnl::memory::desc& user_md, const dnnl::memory::desc& target_md,
void* ptr, const std::string& key, const std::string& suffix,
bool is_persistent = false, const std::vector<float>& scale_data = {1.0f},
int mask = 0) {
const auto target_key = key + suffix + "_target";
const auto key_reorder_p = key + suffix + "reorder_p";
const auto user_key = key + suffix + "_user";
......@@ -254,7 +251,7 @@ class ConvTransposeMKLDNNHandlerT
std::make_shared<dnnl::memory>(user_md, this->engine_, ptr);
if (user_md != target_md) {
target_memory_p =
std::make_shared<mkldnn::memory>(target_md, this->engine_);
std::make_shared<dnnl::memory>(target_md, this->engine_);
dnnl::reorder::primitive_desc reorder_pdesc;
if (platform::is_int8<T>()) {
dnnl::primitive_attr attr;
......@@ -271,8 +268,8 @@ class ConvTransposeMKLDNNHandlerT
auto& astream = platform::MKLDNNDeviceContext::tls().get_stream();
platform::RecordEvent record_reorder("int_reorder",
platform::EventRole::kUniqueOp);
reorder_p->execute(astream, {{MKLDNN_ARG_FROM, *user_memory_p},
{MKLDNN_ARG_TO, *target_memory_p}});
reorder_p->execute(astream, {{DNNL_ARG_FROM, *user_memory_p},
{DNNL_ARG_TO, *target_memory_p}});
astream.wait();
} else {
target_memory_p = user_memory_p;
......@@ -288,20 +285,20 @@ class ConvTransposeMKLDNNHandlerT
// TODO(jczaja): Here we detect if reorder is cached it means it is needed
// need to change this to get rid of keys
auto reorder_p = std::static_pointer_cast<mkldnn::reorder>(
auto reorder_p = std::static_pointer_cast<dnnl::reorder>(
dev_ctx.GetBlob(key_reorder_p));
if (reorder_p != nullptr) {
platform::RecordEvent record_reorder("int_reorder",
platform::EventRole::kUniqueOp);
reorder_p->execute(astream, {{MKLDNN_ARG_FROM, *user_memory_p},
{MKLDNN_ARG_TO, *target_memory_p}});
reorder_p->execute(astream, {{DNNL_ARG_FROM, *user_memory_p},
{DNNL_ARG_TO, *target_memory_p}});
astream.wait();
}
}
return target_memory_p;
}
std::shared_ptr<mkldnn::memory> AcquireBiasMemoryWithReorder(
std::shared_ptr<dnnl::memory> AcquireBiasMemoryWithReorder(
const platform::MKLDNNDeviceContext& dev_ctx, const std::string& key,
const framework::Tensor* bias) {
const K* bias_data = bias->data<K>();
......@@ -364,14 +361,14 @@ class ConvTransposeMKLDNNOpKernel : public framework::OpKernel<T> {
auto conv_p = handler.AcquireForwardPrimitive();
std::unordered_map<int, dnnl::memory> args = {
{MKLDNN_ARG_SRC, *src_memory_p},
{MKLDNN_ARG_WEIGHTS, *weights_memory_p},
{MKLDNN_ARG_DST, *dst_memory_p}};
{DNNL_ARG_SRC, *src_memory_p},
{DNNL_ARG_WEIGHTS, *weights_memory_p},
{DNNL_ARG_DST, *dst_memory_p}};
if (bias) {
auto bias_memory_p =
handler.AcquireBiasMemoryWithReorder(dev_ctx, key, bias);
args.insert({MKLDNN_ARG_BIAS, *bias_memory_p});
args.insert({DNNL_ARG_BIAS, *bias_memory_p});
}
auto& astream = platform::MKLDNNDeviceContext::tls().get_stream();
conv_p->execute(astream, args);
......
......@@ -38,16 +38,16 @@ using framework::ExecutionContext;
using platform::MKLDNNDeviceContext;
using platform::to_void_cast;
using platform::GetMKLDNNFormat;
using mkldnn::memory;
using mkldnn::inner_product_forward;
using mkldnn::primitive;
using mkldnn::stream;
using mkldnn::prop_kind;
using dnnl::memory;
using dnnl::inner_product_forward;
using dnnl::primitive;
using dnnl::stream;
using dnnl::prop_kind;
template <typename T_in, typename T_w, typename T_out>
class FCPrimitiveFactory {
public:
explicit FCPrimitiveFactory(const mkldnn::engine& engine) : engine_(engine) {}
explicit FCPrimitiveFactory(const dnnl::engine& engine) : engine_(engine) {}
void ExecuteFcPrimitive(const LoDTensor* input, const Tensor* weights,
const Tensor* bias, LoDTensor* output,
......@@ -89,8 +89,7 @@ class FCPrimitiveFactory {
// descriptor has been divided into separate cases, based on the number
// of input dimensions.
size_t input_dim_num = input->dims().size();
paddle::optional<mkldnn::inner_product_forward::primitive_desc>
fc_prim_desc;
paddle::optional<dnnl::inner_product_forward::primitive_desc> fc_prim_desc;
memory::desc usr_weights_desc = {};
switch (input_dim_num) {
case 2:
......@@ -140,14 +139,14 @@ class FCPrimitiveFactory {
void Execute() {
auto& astream = platform::MKLDNNDeviceContext::tls().get_stream();
if (bias_) {
fc_->execute(astream, {{MKLDNN_ARG_SRC, *input_},
{MKLDNN_ARG_WEIGHTS, *weights_},
{MKLDNN_ARG_BIAS, *bias_},
{MKLDNN_ARG_DST, *output_}});
fc_->execute(astream, {{DNNL_ARG_SRC, *input_},
{DNNL_ARG_WEIGHTS, *weights_},
{DNNL_ARG_BIAS, *bias_},
{DNNL_ARG_DST, *output_}});
} else {
fc_->execute(astream, {{MKLDNN_ARG_SRC, *input_},
{MKLDNN_ARG_WEIGHTS, *weights_},
{MKLDNN_ARG_DST, *output_}});
fc_->execute(astream, {{DNNL_ARG_SRC, *input_},
{DNNL_ARG_WEIGHTS, *weights_},
{DNNL_ARG_DST, *output_}});
}
astream.wait();
}
......@@ -192,7 +191,7 @@ class FCPrimitiveFactory {
}
}
mkldnn::inner_product_forward::primitive_desc Create2DFcPrimDescriptor(
dnnl::inner_product_forward::primitive_desc Create2DFcPrimDescriptor(
const LoDTensor* input, const Tensor* weights, const Tensor* bias,
LoDTensor* output, const ExecutionContext& ctx) {
auto src_desc = CreateMemDescriptor<T_in>(input, input->format());
......@@ -213,7 +212,7 @@ class FCPrimitiveFactory {
memory::desc Create2DUserWeightsDesc() { return weights_->get_desc(); }
mkldnn::inner_product_forward::primitive_desc Create3DFcPrimDescriptor(
dnnl::inner_product_forward::primitive_desc Create3DFcPrimDescriptor(
const LoDTensor* input, const Tensor* weights, const Tensor* bias,
LoDTensor* output, const ExecutionContext& ctx) {
auto input_dims = framework::vectorize(input->dims());
......@@ -244,7 +243,7 @@ class FCPrimitiveFactory {
return CreateMemDescriptor<float>(dims, MKLDNNMemoryFormat::oiw);
}
mkldnn::inner_product_forward::primitive_desc Create4DFcPrimDescriptor(
dnnl::inner_product_forward::primitive_desc Create4DFcPrimDescriptor(
const LoDTensor* input, const Tensor* weights, const Tensor* bias,
LoDTensor* output, const ExecutionContext& ctx) {
auto src_desc = CreateMemDescriptor<T_in>(input, input->format());
......@@ -274,13 +273,13 @@ class FCPrimitiveFactory {
}
// Convert data from one data format to another
std::shared_ptr<mkldnn::memory> Reorder(const memory::desc& src_desc,
const memory::desc& dst_desc,
void* src_data) {
std::shared_ptr<dnnl::memory> Reorder(const memory::desc& src_desc,
const memory::desc& dst_desc,
void* src_data) {
auto src_mem = memory(src_desc, engine_, src_data);
auto dst_mem = std::make_shared<memory>(dst_desc, engine_);
auto reorder = mkldnn::reorder(src_mem, *dst_mem);
auto reorder = dnnl::reorder(src_mem, *dst_mem);
auto& astream = platform::MKLDNNDeviceContext::tls().get_stream();
{
......@@ -295,11 +294,11 @@ class FCPrimitiveFactory {
// Convert data from one data format to another and rescale it.
// If the desired data type is (un)signed int8, quantization occurs here.
std::shared_ptr<mkldnn::memory> ReorderWithScale(
std::shared_ptr<dnnl::memory> ReorderWithScale(
const std::shared_ptr<memory> src_mem, const memory::desc& dst_md,
const std::vector<float>& scale_data) {
auto dst_mem = std::make_shared<mkldnn::memory>(dst_md, engine_);
mkldnn::primitive_attr attributes;
auto dst_mem = std::make_shared<dnnl::memory>(dst_md, engine_);
dnnl::primitive_attr attributes;
// According to MKL-DNN's documentation mask determines along which
// dimensions should the scale be applied.
// 0 - Single scale applied to whole tensor
......@@ -308,14 +307,14 @@ class FCPrimitiveFactory {
// becuase we perform per-output-channel quantization
int mask = CreateMask(0, scale_data.size() > 1);
attributes.set_output_scales(mask, scale_data);
auto reorder = mkldnn::reorder(*src_mem, *dst_mem, attributes);
auto reorder = dnnl::reorder(*src_mem, *dst_mem, attributes);
auto& astream = platform::MKLDNNDeviceContext::tls().get_stream();
{
platform::RecordEvent record_reorder("int_reorder",
platform::EventRole::kUniqueOp);
reorder.execute(astream,
{{MKLDNN_ARG_FROM, *src_mem}, {MKLDNN_ARG_TO, *dst_mem}});
{{DNNL_ARG_FROM, *src_mem}, {DNNL_ARG_TO, *dst_mem}});
astream.wait();
}
......@@ -323,43 +322,43 @@ class FCPrimitiveFactory {
}
template <typename T>
static mkldnn::memory::desc CreateMemDescriptor(
static dnnl::memory::desc CreateMemDescriptor(
const std::vector<int64_t>& dims, MKLDNNMemoryFormat format) {
return platform::MKLDNNMemDesc(dims, platform::MKLDNNGetDataType<T>(),
format);
}
template <typename T>
static mkldnn::memory::desc CreateMemDescriptor(const Tensor* tensor,
MKLDNNMemoryFormat format) {
static dnnl::memory::desc CreateMemDescriptor(const Tensor* tensor,
MKLDNNMemoryFormat format) {
auto dims = framework::vectorize(tensor->dims());
return CreateMemDescriptor<T>(dims, format);
}
template <typename T>
mkldnn::memory CreateMemory(const mkldnn::memory::desc& desc,
const Tensor* tensor) {
dnnl::memory CreateMemory(const dnnl::memory::desc& desc,
const Tensor* tensor) {
return CreateMemory(desc, platform::to_void_cast<T>(tensor->data<T>()));
}
mkldnn::memory CreateMemory(const mkldnn::memory::desc& desc, void* data) {
dnnl::memory CreateMemory(const dnnl::memory::desc& desc, void* data) {
return memory(desc, engine_, data);
}
template <typename T>
std::shared_ptr<mkldnn::memory> CreateMemoryToBeCached(
const mkldnn::memory::desc& desc, const Tensor* tensor) {
std::shared_ptr<dnnl::memory> CreateMemoryToBeCached(
const dnnl::memory::desc& desc, const Tensor* tensor) {
return CreateMemoryToBeCached(desc,
platform::to_void_cast<T>(tensor->data<T>()));
}
std::shared_ptr<mkldnn::memory> CreateMemoryToBeCached(
const mkldnn::memory::desc& desc, void* data) {
std::shared_ptr<dnnl::memory> CreateMemoryToBeCached(
const dnnl::memory::desc& desc, void* data) {
return std::make_shared<memory>(desc, engine_, data);
}
// Create weights memory and transform to default MKL-DNN format
std::shared_ptr<mkldnn::memory> CreateWeightsMemory(const Tensor* weights) {
std::shared_ptr<dnnl::memory> CreateWeightsMemory(const Tensor* weights) {
auto dims = framework::vectorize(weights->dims());
std::swap(dims[0], dims[1]); // Correct output dimensions
auto src_desc = CreateMemDescriptor<float>(dims, MKLDNNMemoryFormat::io);
......@@ -446,9 +445,9 @@ class FCPrimitiveFactory {
}
// Fuse relu into FC with activation type attribute has been set to 'relu'
mkldnn::primitive_attr CreatePostOps(const ExecutionContext& ctx) {
mkldnn::primitive_attr attributes;
mkldnn::post_ops post_operations;
dnnl::primitive_attr CreatePostOps(const ExecutionContext& ctx) {
dnnl::primitive_attr attributes;
dnnl::post_ops post_operations;
auto output_shift_scale = ComputeOutputShiftScale(ctx);
int mask = CreateMask(1, output_shift_scale.size() > 1);
......@@ -458,56 +457,55 @@ class FCPrimitiveFactory {
constexpr float scale = 1.0f;
constexpr float negative_slope = 0.0f;
constexpr float placeholder = 1.0f; // beta
post_operations.append_eltwise(scale, mkldnn::algorithm::eltwise_relu,
post_operations.append_eltwise(scale, dnnl::algorithm::eltwise_relu,
negative_slope, placeholder);
} else if (ctx.Attr<std::string>("activation_type") == "gelu") {
constexpr float scale = 1.0f;
constexpr float alpha = 0.0f;
constexpr float beta = 0.0f;
post_operations.append_eltwise(scale, mkldnn::algorithm::eltwise_gelu,
post_operations.append_eltwise(scale, dnnl::algorithm::eltwise_gelu,
alpha, beta);
} else if (ctx.Attr<std::string>("activation_type") == "gelu_tanh") {
constexpr float scale = 1.0f;
constexpr float alpha = 0.0f;
constexpr float beta = 0.0f;
post_operations.append_eltwise(
scale, mkldnn::algorithm::eltwise_gelu_tanh, alpha, beta);
post_operations.append_eltwise(scale, dnnl::algorithm::eltwise_gelu_tanh,
alpha, beta);
} else if (ctx.Attr<std::string>("activation_type") == "gelu_erf") {
constexpr float scale = 1.0f;
constexpr float alpha = 0.0f;
constexpr float beta = 0.0f;
post_operations.append_eltwise(scale, mkldnn::algorithm::eltwise_gelu_erf,
post_operations.append_eltwise(scale, dnnl::algorithm::eltwise_gelu_erf,
alpha, beta);
} else if (ctx.Attr<std::string>("activation_type") == "tanh") {
constexpr float scale = 1.0f;
constexpr float alpha = 0.0f;
constexpr float beta = 0.0f;
post_operations.append_eltwise(scale, mkldnn::algorithm::eltwise_tanh,
post_operations.append_eltwise(scale, dnnl::algorithm::eltwise_tanh,
alpha, beta);
} else if (ctx.Attr<std::string>("activation_type") == "sigmoid") {
constexpr float scale = 1.0f;
constexpr float alpha = 0.0f;
constexpr float beta = 0.0f;
post_operations.append_eltwise(scale, mkldnn::algorithm::eltwise_logistic,
post_operations.append_eltwise(scale, dnnl::algorithm::eltwise_logistic,
alpha, beta);
} else if (ctx.Attr<std::string>("activation_type") == "hard_swish") {
constexpr float scale = 1.0f;
constexpr float alpha = 0.0f;
constexpr float beta = 0.0f;
post_operations.append_eltwise(
scale, mkldnn::algorithm::eltwise_hardswish, alpha, beta);
post_operations.append_eltwise(scale, dnnl::algorithm::eltwise_hardswish,
alpha, beta);
}
attributes.set_post_ops(post_operations);
return attributes;
}
mkldnn::inner_product_forward::primitive_desc CreateFcPrimDesc(
const mkldnn::memory::desc& input_desc,
const mkldnn::memory::desc& weights_desc,
const mkldnn::memory::desc& bias_desc,
const mkldnn::memory::desc& dst_desc,
const mkldnn::primitive_attr& attrs) {
dnnl::inner_product_forward::primitive_desc CreateFcPrimDesc(
const dnnl::memory::desc& input_desc,
const dnnl::memory::desc& weights_desc,
const dnnl::memory::desc& bias_desc, const dnnl::memory::desc& dst_desc,
const dnnl::primitive_attr& attrs) {
auto fc_desc =
inner_product_forward::desc(prop_kind::forward_scoring, input_desc,
weights_desc, bias_desc, dst_desc);
......@@ -517,8 +515,8 @@ class FCPrimitiveFactory {
// Create output memory based on output tensor and inner_product
// primitive descriptor format chosen for output
mkldnn::memory CreateDstMemory(
const mkldnn::inner_product_forward::primitive_desc& fc_prim_desc,
dnnl::memory CreateDstMemory(
const dnnl::inner_product_forward::primitive_desc& fc_prim_desc,
const ExecutionContext& ctx, Tensor* output) {
auto dst_desc = fc_prim_desc.dst_desc();
auto buffer_size = dst_desc.get_size();
......@@ -545,7 +543,7 @@ class FCPrimitiveFactory {
}
private:
const mkldnn::engine& engine_;
const dnnl::engine& engine_;
paddle::optional<memory> input_;
paddle::optional<memory> output_;
std::shared_ptr<memory> bias_;
......
......@@ -22,16 +22,16 @@ using paddle::platform::MKLDNNDeviceContext;
template <typename T>
class LRNMKLDNNHandler
: public platform::MKLDNNHandlerNoCachingT<T, mkldnn::lrn_forward,
mkldnn::lrn_backward> {
: public platform::MKLDNNHandlerNoCachingT<T, dnnl::lrn_forward,
dnnl::lrn_backward> {
public:
LRNMKLDNNHandler(const framework::ExecutionContext& ctx,
const mkldnn::engine mkldnn_engine,
platform::Place cpu_place, const Tensor* input)
const dnnl::engine mkldnn_engine, platform::Place cpu_place,
const Tensor* input)
: platform::MKLDNNHandlerNoCachingT<T, mkldnn::lrn_forward,
mkldnn::lrn_backward>(mkldnn_engine,
cpu_place) {
: platform::MKLDNNHandlerNoCachingT<T, dnnl::lrn_forward,
dnnl::lrn_backward>(mkldnn_engine,
cpu_place) {
const int n = ctx.Attr<int>("n");
// MKL-DNN implements LRN in a caffe way:
// http://caffe.berkeleyvision.org/tutorial/layers/lrn.html
......@@ -46,22 +46,22 @@ class LRNMKLDNNHandler
auto dims = framework::vectorize(input->dims());
auto src_md = mkldnn::memory::desc(dims, platform::MKLDNNGetDataType<T>(),
input->format());
auto src_md = dnnl::memory::desc(dims, platform::MKLDNNGetDataType<T>(),
input->format());
this->AcquireForwardPrimitiveDescriptor(
is_test ? mkldnn::prop_kind::forward_inference
: mkldnn::prop_kind::forward_training,
mkldnn::algorithm::lrn_across_channels, src_md, n, alpha, beta, k);
is_test ? dnnl::prop_kind::forward_inference
: dnnl::prop_kind::forward_training,
dnnl::algorithm::lrn_across_channels, src_md, n, alpha, beta, k);
}
LRNMKLDNNHandler(const framework::ExecutionContext& ctx,
const mkldnn::engine mkldnn_engine,
platform::Place cpu_place, const Tensor* in_x,
const Tensor* out_grad, Tensor* in_x_grad)
: platform::MKLDNNHandlerNoCachingT<T, mkldnn::lrn_forward,
mkldnn::lrn_backward>(mkldnn_engine,
cpu_place) {
const dnnl::engine mkldnn_engine, platform::Place cpu_place,
const Tensor* in_x, const Tensor* out_grad,
Tensor* in_x_grad)
: platform::MKLDNNHandlerNoCachingT<T, dnnl::lrn_forward,
dnnl::lrn_backward>(mkldnn_engine,
cpu_place) {
PADDLE_ENFORCE_EQ(
ctx.Attr<bool>("is_test"), false,
platform::errors::PreconditionNotMet(
......@@ -74,28 +74,28 @@ class LRNMKLDNNHandler
auto dims = framework::vectorize<int64_t>(in_x->dims());
auto src_md = mkldnn::memory::desc(dims, platform::MKLDNNGetDataType<T>(),
in_x->format());
auto diff_md = mkldnn::memory::desc(dims, platform::MKLDNNGetDataType<T>(),
out_grad->format());
auto src_md = dnnl::memory::desc(dims, platform::MKLDNNGetDataType<T>(),
in_x->format());
auto diff_md = dnnl::memory::desc(dims, platform::MKLDNNGetDataType<T>(),
out_grad->format());
this->AcquireForwardPrimitiveDescriptor(
mkldnn::prop_kind::forward_training,
mkldnn::algorithm::lrn_across_channels, src_md, n, alpha, beta, k);
dnnl::prop_kind::forward_training, dnnl::algorithm::lrn_across_channels,
src_md, n, alpha, beta, k);
this->AcquireBackwardPrimitiveDescriptor(
mkldnn::algorithm::lrn_across_channels, src_md, diff_md, n, alpha, beta,
dnnl::algorithm::lrn_across_channels, src_md, diff_md, n, alpha, beta,
k);
}
std::shared_ptr<mkldnn::memory> AcquireWorkspaceMemory(Tensor* workspace) {
std::shared_ptr<dnnl::memory> AcquireWorkspaceMemory(Tensor* workspace) {
T* ptr = workspace->mutable_data<T>(
this->place_, this->fwd_pd_->workspace_desc().get_size());
return this->AcquireMemoryFromPrimitive(this->fwd_pd_->workspace_desc(),
ptr);
}
std::shared_ptr<mkldnn::memory> AcquireBackwardWorkspaceMemory(
std::shared_ptr<dnnl::memory> AcquireBackwardWorkspaceMemory(
const Tensor* workspace) {
const T* workspace_data = workspace->data<T>();
return this->AcquireMemoryFromPrimitive(
......@@ -136,12 +136,12 @@ class LRNMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
auto& astream = platform::MKLDNNDeviceContext::tls().get_stream();
if (!workspace_memory->get_desc().is_zero()) {
mid->set_format(platform::GetMKLDNNFormat(*workspace_memory));
lrn_p->execute(astream, {{MKLDNN_ARG_SRC, *src_memory},
{MKLDNN_ARG_DST, *dst_memory},
{MKLDNN_ARG_WORKSPACE, *workspace_memory}});
lrn_p->execute(astream, {{DNNL_ARG_SRC, *src_memory},
{DNNL_ARG_DST, *dst_memory},
{DNNL_ARG_WORKSPACE, *workspace_memory}});
} else {
lrn_p->execute(astream, {{MKLDNN_ARG_SRC, *src_memory},
{MKLDNN_ARG_DST, *dst_memory}});
lrn_p->execute(
astream, {{DNNL_ARG_SRC, *src_memory}, {DNNL_ARG_DST, *dst_memory}});
}
astream.wait();
......@@ -182,10 +182,10 @@ class LRNMKLDNNGradOpKernel : public paddle::framework::OpKernel<T> {
auto lrn_bwd = handler.AcquireBackwardPrimitive();
auto& astream = platform::MKLDNNDeviceContext::tls().get_stream();
lrn_bwd->execute(astream, {{MKLDNN_ARG_SRC, *src_memory},
{MKLDNN_ARG_DIFF_DST, *diff_dst_memory},
{MKLDNN_ARG_DIFF_SRC, *diff_src_memory},
{MKLDNN_ARG_WORKSPACE, *workspace}});
lrn_bwd->execute(astream, {{DNNL_ARG_SRC, *src_memory},
{DNNL_ARG_DIFF_DST, *diff_dst_memory},
{DNNL_ARG_DIFF_SRC, *diff_src_memory},
{DNNL_ARG_WORKSPACE, *workspace}});
astream.wait();
in_x_grad->set_layout(framework::DataLayout::kMKLDNN);
......
......@@ -108,7 +108,7 @@ template <typename XT, typename YT, typename OT>
class MatMulMKLDNNHandler
: public paddle::platform::MKLDNNHandlerNoCachingT<XT, dnnl::matmul> {
public:
MatMulMKLDNNHandler(const mkldnn::engine engine,
MatMulMKLDNNHandler(const dnnl::engine engine,
paddle::platform::Place cpu_place, Tensor* x,
bool trans_x, Tensor* y, bool trans_y, Tensor* out,
float scale)
......@@ -148,7 +148,7 @@ class MatMulMKLDNNHandler
this->AcquireForwardPrimitiveDescriptor(attrs, x_md, y_md, out_md);
}
// Constructor for FWD MatMul
MatMulMKLDNNHandler(const mkldnn::engine engine, const ExecutionContext& ctx,
MatMulMKLDNNHandler(const dnnl::engine engine, const ExecutionContext& ctx,
float scale)
: paddle::platform::MKLDNNHandlerNoCachingT<XT, dnnl::matmul>(
engine, ctx.GetPlace()),
......@@ -202,9 +202,9 @@ class MatMulMKLDNNHandler
weights_memory_p->set_data_handle(y_ptr);
dst_memory_p->set_data_handle(out_ptr);
matmul_p->execute(astream, {
{MKLDNN_ARG_SRC, *src_memory_p},
{MKLDNN_ARG_WEIGHTS, *weights_memory_p},
{MKLDNN_ARG_DST, *dst_memory_p},
{DNNL_ARG_SRC, *src_memory_p},
{DNNL_ARG_WEIGHTS, *weights_memory_p},
{DNNL_ARG_DST, *dst_memory_p},
});
x_ptr = static_cast<char*>(x_ptr) + std::get<0>(offsets);
y_ptr = static_cast<char*>(y_ptr) + std::get<1>(offsets);
......@@ -218,7 +218,7 @@ class MatMulMKLDNNHandler
out->set_layout(DataLayout::kMKLDNN);
}
std::shared_ptr<mkldnn::memory> AcquireDstMemory(
std::shared_ptr<dnnl::memory> AcquireDstMemory(
paddle::framework::Tensor* output) {
// We cannot use base AcquireDstMemory as it makes an allocation request
// base on DST memory primitive size. This is fine in general, but in MatMul
......@@ -548,7 +548,7 @@ void MatMulGradMKLDNNKernel<T>::Compute(const ExecutionContext& ctx) const {
template <typename T>
void MatMulGradMKLDNNKernel<T>::ExecuteMatMulGrad(
const ExecutionContext& ctx, const MKLDNNDeviceContext& dev_ctx,
const mkldnn::engine& engine, Tensor* x, bool trans_x,
const dnnl::engine& engine, Tensor* x, bool trans_x,
bool is_fold_init_dims_x, Tensor* y, bool trans_y, bool is_fold_init_dims_y,
Tensor* out) const {
// gradient is calculated in a different way when broadcasting is used
......
......@@ -33,18 +33,17 @@ using framework::DataLayout;
using framework::DDim;
using framework::ExecutionContext;
using framework::Tensor;
using mkldnn::inner_product_forward;
using mkldnn::memory;
using mkldnn::prop_kind;
using mkldnn::stream;
using dnnl::inner_product_forward;
using dnnl::memory;
using dnnl::prop_kind;
using dnnl::stream;
using platform::MKLDNNDeviceContext;
using platform::to_void_cast;
template <typename XT, typename YT, typename OT>
class MulPrimitiveFactory {
public:
explicit MulPrimitiveFactory(const mkldnn::engine &engine)
: engine_(engine) {}
explicit MulPrimitiveFactory(const dnnl::engine &engine) : engine_(engine) {}
inner_product_forward CreateMulPrimitive(const Tensor *x_input,
const Tensor *y_input,
......@@ -99,15 +98,15 @@ class MulPrimitiveFactory {
const memory::desc &dst_desc, void *src_data,
const std::vector<float> &scale) {
auto mask = scale.size() > 1 ? 1 : 0;
mkldnn::primitive_attr attr;
dnnl::primitive_attr attr;
attr.set_output_scales(mask, scale);
auto src_mem = memory(src_desc, engine_, src_data);
auto dst_mem = memory(dst_desc, engine_);
auto reorder_pd = mkldnn::reorder::primitive_desc(src_mem, dst_mem, attr);
auto reorder_pd = dnnl::reorder::primitive_desc(src_mem, dst_mem, attr);
auto reorder = mkldnn::reorder(reorder_pd);
auto reorder = dnnl::reorder(reorder_pd);
auto &astream = platform::MKLDNNDeviceContext::tls().get_stream();
{
......@@ -132,9 +131,9 @@ class MulPrimitiveFactory {
scale_y);
}
mkldnn::primitive_attr CreateMulAttr(const ExecutionContext &ctx,
bool force_fp32_output) {
mkldnn::primitive_attr mul_attr;
dnnl::primitive_attr CreateMulAttr(const ExecutionContext &ctx,
bool force_fp32_output) {
dnnl::primitive_attr mul_attr;
auto scale_y_data = ctx.Attr<std::vector<float>>("scale_y");
auto scale_x_data = ctx.Attr<float>("scale_x");
......@@ -185,9 +184,9 @@ class MulPrimitiveFactory {
void Execute() {
auto &astream = platform::MKLDNNDeviceContext::tls().get_stream();
(*mul_).execute(astream, {{MKLDNN_ARG_SRC, *x_input_},
{MKLDNN_ARG_WEIGHTS, *y_input_},
{MKLDNN_ARG_DST, *output_}});
(*mul_).execute(astream, {{DNNL_ARG_SRC, *x_input_},
{DNNL_ARG_WEIGHTS, *y_input_},
{DNNL_ARG_DST, *output_}});
astream.wait();
}
......@@ -268,7 +267,7 @@ class MulPrimitiveFactory {
auto dst_mem = dst_data ? memory(dst_desc, engine_, dst_data)
: memory(dst_desc, engine_);
auto reorder = mkldnn::reorder(src_mem, dst_mem);
auto reorder = dnnl::reorder(src_mem, dst_mem);
auto &astream = platform::MKLDNNDeviceContext::tls().get_stream();
{
......@@ -289,7 +288,7 @@ class MulPrimitiveFactory {
return Reorder(src_desc, dst_desc, to_void_cast<YT>(input_y->data<YT>()));
}
const mkldnn::engine &engine_;
const dnnl::engine &engine_;
paddle::optional<memory> x_input_;
paddle::optional<memory> y_input_;
paddle::optional<memory> output_;
......@@ -303,7 +302,7 @@ template <typename XT, typename YT, typename OT>
std::shared_ptr<MulPrimitiveFactory<XT, YT, OT>> GetPrimitiveFactory(
const MKLDNNDeviceContext &dev_ctx, const ExecutionContext &ctx,
const Tensor *input_x, const Tensor *input_y,
const mkldnn::engine &mkldnn_engine) {
const dnnl::engine &mkldnn_engine) {
std::string key = platform::CreateKey(
dev_ctx, input_x->type(), framework::vectorize(input_x->dims()),
input_y->type(), framework::vectorize(input_y->dims()),
......@@ -327,7 +326,7 @@ inner_product_forward GetMulPrimitive(const MKLDNNDeviceContext &dev_ctx,
const ExecutionContext &ctx,
const Tensor *input_x,
const Tensor *input_y, Tensor *output,
const mkldnn::engine &mkldnn_engine) {
const dnnl::engine &mkldnn_engine) {
constexpr bool is_int8 =
std::is_same<XT, int8_t>::value || std::is_same<XT, uint8_t>::value;
bool force_fp32_output = ctx.Attr<bool>("force_fp32_output");
......
......@@ -20,24 +20,24 @@ namespace paddle {
namespace operators {
using framework::DataLayout;
using mkldnn::memory;
using mkldnn::pooling_backward;
using mkldnn::pooling_forward;
using mkldnn::primitive;
using mkldnn::reorder;
using mkldnn::stream;
using dnnl::memory;
using dnnl::pooling_backward;
using dnnl::pooling_forward;
using dnnl::primitive;
using dnnl::reorder;
using dnnl::stream;
using platform::to_void_cast;
template <typename T>
class PoolingMKLDNNHandler
: public platform::MKLDNNHandlerNoCachingT<T, mkldnn::pooling_forward,
mkldnn::pooling_backward> {
: public platform::MKLDNNHandlerNoCachingT<T, dnnl::pooling_forward,
dnnl::pooling_backward> {
public:
PoolingMKLDNNHandler(const paddle::framework::ExecutionContext& ctx,
const mkldnn::engine mkldnn_engine, const Tensor* input,
const dnnl::engine mkldnn_engine, const Tensor* input,
Tensor* output)
: platform::MKLDNNHandlerNoCachingT<T, mkldnn::pooling_forward,
mkldnn::pooling_backward>(
: platform::MKLDNNHandlerNoCachingT<T, dnnl::pooling_forward,
dnnl::pooling_backward>(
mkldnn_engine, ctx.GetPlace()) {
PADDLE_ENFORCE_EQ(input->layout(), DataLayout::kMKLDNN,
platform::errors::InvalidArgument(
......@@ -98,7 +98,7 @@ class PoolingMKLDNNHandler
const auto exclude_padding = ctx.Attr<bool>("exclusive");
const auto src_md = mkldnn::memory::desc(src_tz, dt, input->format());
const auto src_md = dnnl::memory::desc(src_tz, dt, input->format());
/* create memory descriptor for pooling without specified format
* ('any') which lets a primitive (pooling in this case) choose
* the memory format preferred for best performance
......@@ -119,22 +119,21 @@ class PoolingMKLDNNHandler
ComputeAdaptivePoolParameters(ctx, src_tz, &ksize, &strides);
this->AcquireForwardPrimitiveDescriptor(
is_test ? mkldnn::prop_kind::forward_inference
: mkldnn::prop_kind::forward_training,
is_test ? dnnl::prop_kind::forward_inference
: dnnl::prop_kind::forward_training,
pooling_type == "max"
? mkldnn::algorithm::pooling_max
: (exclude_padding
? mkldnn::algorithm::pooling_avg_exclude_padding
: mkldnn::algorithm::pooling_avg_include_padding),
? dnnl::algorithm::pooling_max
: (exclude_padding ? dnnl::algorithm::pooling_avg_exclude_padding
: dnnl::algorithm::pooling_avg_include_padding),
src_md, dst_md, strides, ksize, mkldnn_paddings[0], mkldnn_paddings[1]);
}
PoolingMKLDNNHandler(const paddle::framework::ExecutionContext& ctx,
const mkldnn::engine mkldnn_engine, const Tensor* in_x,
const dnnl::engine mkldnn_engine, const Tensor* in_x,
const Tensor* out_grad, Tensor* in_x_grad)
: platform::MKLDNNHandlerNoCachingT<T, mkldnn::pooling_forward,
mkldnn::pooling_backward>(
: platform::MKLDNNHandlerNoCachingT<T, dnnl::pooling_forward,
dnnl::pooling_backward>(
mkldnn_engine, ctx.GetPlace()) {
PADDLE_ENFORCE_EQ(
in_x->layout(), DataLayout::kMKLDNN,
......@@ -185,12 +184,11 @@ class PoolingMKLDNNHandler
auto diff_dst_tz = paddle::framework::vectorize<int64_t>(out_grad->dims());
const auto dt = framework::ToMKLDNNDataType(in_x->type());
auto src_md = mkldnn::memory::desc(src_tz, dt, in_x->format());
auto dst_md =
mkldnn::memory::desc(diff_dst_tz, dt, MKLDNNMemoryFormat::any);
auto diff_dst_md = mkldnn::memory::desc(
auto src_md = dnnl::memory::desc(src_tz, dt, in_x->format());
auto dst_md = dnnl::memory::desc(diff_dst_tz, dt, MKLDNNMemoryFormat::any);
auto diff_dst_md = dnnl::memory::desc(
diff_dst_tz, platform::MKLDNNGetDataType<T>(), out_grad->format());
auto diff_src_md = mkldnn::memory::desc(
auto diff_src_md = dnnl::memory::desc(
diff_src_tz, platform::MKLDNNGetDataType<T>(), MKLDNNMemoryFormat::any);
auto mkldnn_paddings = platform::ToMkldnnPadding(paddings);
......@@ -205,44 +203,42 @@ class PoolingMKLDNNHandler
const auto exclude_padding = ctx.Attr<bool>("exclusive");
this->AcquireForwardPrimitiveDescriptor(
mkldnn::prop_kind::forward_training,
dnnl::prop_kind::forward_training,
pooling_type == "max"
? mkldnn::algorithm::pooling_max
: (exclude_padding
? mkldnn::algorithm::pooling_avg_exclude_padding
: mkldnn::algorithm::pooling_avg_include_padding),
? dnnl::algorithm::pooling_max
: (exclude_padding ? dnnl::algorithm::pooling_avg_exclude_padding
: dnnl::algorithm::pooling_avg_include_padding),
src_md, dst_md, strides, ksize, mkldnn_paddings[0], mkldnn_paddings[1]);
this->AcquireBackwardPrimitiveDescriptor(
pooling_type == "max"
? mkldnn::algorithm::pooling_max
: (exclude_padding
? mkldnn::algorithm::pooling_avg_exclude_padding
: mkldnn::algorithm::pooling_avg_include_padding),
? dnnl::algorithm::pooling_max
: (exclude_padding ? dnnl::algorithm::pooling_avg_exclude_padding
: dnnl::algorithm::pooling_avg_include_padding),
diff_src_md, diff_dst_md, strides, ksize, mkldnn_paddings[0],
mkldnn_paddings[1]);
}
std::shared_ptr<mkldnn::memory> AcquireWorkspaceMemory(
std::shared_ptr<dnnl::memory> AcquireWorkspaceMemory(
const platform::MKLDNNDeviceContext& dev_ctx,
const std::string& unique_name) {
mkldnn::memory::desc workspace_md = this->fwd_pd_->workspace_desc();
dnnl::memory::desc workspace_md = this->fwd_pd_->workspace_desc();
// Pooling Workspace has to be passed to Grad op that
// may be executed by diffrent thread, hence
// for that one we use key that does not contain TID
std::string workspace_key =
platform::CreateKey(dev_ctx, workspace_md.dims(),
workspace_md.data_type(), unique_name, "@wrk");
auto mem_p = std::static_pointer_cast<mkldnn::memory>(
dev_ctx.GetBlob(workspace_key));
auto mem_p =
std::static_pointer_cast<dnnl::memory>(dev_ctx.GetBlob(workspace_key));
if (mem_p == nullptr) {
static std::mutex acquire_barrier;
std::lock_guard<std::mutex> block_threads_until_finish_this_job(
acquire_barrier);
mem_p = std::static_pointer_cast<mkldnn::memory>(
mem_p = std::static_pointer_cast<dnnl::memory>(
dev_ctx.GetBlob(workspace_key));
if (mem_p == nullptr) {
mem_p = std::make_shared<mkldnn::memory>(workspace_md, this->engine_);
mem_p = std::make_shared<dnnl::memory>(workspace_md, this->engine_);
dev_ctx.SetBlob(workspace_key, mem_p);
}
}
......@@ -318,13 +314,13 @@ class PoolMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
// Training
auto workspace_memory =
handler.AcquireWorkspaceMemory(dev_ctx, ctx.OutputName("Out"));
pool_p->execute(astream, {{MKLDNN_ARG_SRC, *src_memory},
{MKLDNN_ARG_DST, *dst_memory},
{MKLDNN_ARG_WORKSPACE, *workspace_memory}});
pool_p->execute(astream, {{DNNL_ARG_SRC, *src_memory},
{DNNL_ARG_DST, *dst_memory},
{DNNL_ARG_WORKSPACE, *workspace_memory}});
} else {
// Inference
pool_p->execute(astream, {{MKLDNN_ARG_SRC, *src_memory},
{MKLDNN_ARG_DST, *dst_memory}});
pool_p->execute(
astream, {{DNNL_ARG_SRC, *src_memory}, {DNNL_ARG_DST, *dst_memory}});
}
astream.wait();
......@@ -360,13 +356,13 @@ class PoolMKLDNNGradOpKernel : public paddle::framework::OpKernel<T> {
// Max - pooling needs Workspace
auto workspace_memory =
handler.AcquireWorkspaceMemory(dev_ctx, ctx.InputName("Out"));
pool_bwd_p->execute(astream, {{MKLDNN_ARG_DIFF_SRC, *diff_src_memory},
{MKLDNN_ARG_DIFF_DST, *diff_dst_memory},
{MKLDNN_ARG_WORKSPACE, *workspace_memory}});
pool_bwd_p->execute(astream, {{DNNL_ARG_DIFF_SRC, *diff_src_memory},
{DNNL_ARG_DIFF_DST, *diff_dst_memory},
{DNNL_ARG_WORKSPACE, *workspace_memory}});
} else {
// Average Pooling
pool_bwd_p->execute(astream, {{MKLDNN_ARG_DIFF_SRC, *diff_src_memory},
{MKLDNN_ARG_DIFF_DST, *diff_dst_memory}});
pool_bwd_p->execute(astream, {{DNNL_ARG_DIFF_SRC, *diff_src_memory},
{DNNL_ARG_DIFF_DST, *diff_dst_memory}});
}
astream.wait();
......
......@@ -37,8 +37,7 @@ class ScaleMKLDNNKernel : public framework::OpKernel<T> {
bool is_inplaced = x->IsSharedBufferWith(*out);
platform::ActivationMKLDNNHandler<T> handler(
mkldnn::algorithm::eltwise_linear, ctx, mkldnn_engine, ctx.GetPlace(),
x);
dnnl::algorithm::eltwise_linear, ctx, mkldnn_engine, ctx.GetPlace(), x);
auto src_memory_p = handler.AcquireSrcMemory(x);
std::shared_ptr<dnnl::memory> dst_memory_p = nullptr;
......@@ -51,8 +50,8 @@ class ScaleMKLDNNKernel : public framework::OpKernel<T> {
auto activation_p = handler.AcquireForwardPrimitive();
auto& astream = paddle::platform::MKLDNNDeviceContext::tls().get_stream();
activation_p->execute(astream, {{MKLDNN_ARG_FROM, *src_memory_p},
{MKLDNN_ARG_TO, *dst_memory_p}});
activation_p->execute(astream, {{DNNL_ARG_FROM, *src_memory_p},
{DNNL_ARG_TO, *dst_memory_p}});
astream.wait();
out->set_layout(framework::DataLayout::kMKLDNN);
......
......@@ -32,15 +32,15 @@ using platform::to_void_cast;
template <typename T>
class SoftmaxMKLDNNHandler
: public platform::MKLDNNHandlerNoCachingT<T, mkldnn::softmax_forward,
mkldnn::softmax_backward> {
: public platform::MKLDNNHandlerNoCachingT<T, dnnl::softmax_forward,
dnnl::softmax_backward> {
public:
SoftmaxMKLDNNHandler(const mkldnn::engine mkldnn_engine,
SoftmaxMKLDNNHandler(const dnnl::engine mkldnn_engine,
platform::Place cpu_place, const Tensor* input,
Tensor* output, const int axis)
: platform::MKLDNNHandlerNoCachingT<T, mkldnn::softmax_forward,
mkldnn::softmax_backward>(
mkldnn_engine, cpu_place) {
: platform::MKLDNNHandlerNoCachingT<T, dnnl::softmax_forward,
dnnl::softmax_backward>(mkldnn_engine,
cpu_place) {
PADDLE_ENFORCE_EQ(
input->dims(), output->dims(),
platform::errors::InvalidArgument(
......@@ -55,13 +55,13 @@ class SoftmaxMKLDNNHandler
}
SoftmaxMKLDNNHandler(const framework::ExecutionContext& ctx,
const mkldnn::engine mkldnn_engine,
const dnnl::engine mkldnn_engine,
platform::Place cpu_place, const Tensor* out,
const Tensor* out_grad, Tensor* in_x_grad,
const std::string& unique_name)
: platform::MKLDNNHandlerNoCachingT<T, mkldnn::softmax_forward,
mkldnn::softmax_backward>(
mkldnn_engine, cpu_place) {
: platform::MKLDNNHandlerNoCachingT<T, dnnl::softmax_forward,
dnnl::softmax_backward>(mkldnn_engine,
cpu_place) {
PADDLE_ENFORCE_EQ(out_grad->dims(), in_x_grad->dims(),
platform::errors::InvalidArgument(
"The shape of softmax_grad's input "
......@@ -154,10 +154,9 @@ class SoftmaxMKLDNNGradKernel : public paddle::framework::OpKernel<T> {
auto softmax_bwd_p = handler.AcquireBackwardPrimitive();
auto& astream = platform::MKLDNNDeviceContext::tls().get_stream();
softmax_bwd_p->execute(astream,
{{MKLDNN_ARG_DST, *dst_memory_p},
{MKLDNN_ARG_DIFF_DST, *diff_dst_memory_p},
{MKLDNN_ARG_DIFF_SRC, *diff_src_memory_p}});
softmax_bwd_p->execute(astream, {{DNNL_ARG_DST, *dst_memory_p},
{DNNL_ARG_DIFF_DST, *diff_dst_memory_p},
{DNNL_ARG_DIFF_SRC, *diff_src_memory_p}});
astream.wait();
in_x_grad->set_layout(framework::DataLayout::kMKLDNN);
......
......@@ -20,10 +20,10 @@ namespace operators {
using framework::DataLayout;
using framework::Tensor;
using framework::LoDTensor;
using mkldnn::memory;
using mkldnn::primitive;
using mkldnn::concat;
using mkldnn::stream;
using dnnl::memory;
using dnnl::primitive;
using dnnl::concat;
using dnnl::stream;
using platform::to_void_cast;
template <typename T>
......@@ -31,7 +31,7 @@ class StackMKLDNNHandler
: public platform::MKLDNNHandlerNoCachingT<T, dnnl::concat> {
public:
StackMKLDNNHandler(const framework::ExecutionContext& ctx,
const mkldnn::engine mkldnn_engine,
const dnnl::engine mkldnn_engine,
const std::vector<const Tensor*>& inputs, Tensor* output)
: platform::MKLDNNHandlerNoCachingT<T, dnnl::concat>(mkldnn_engine,
ctx.GetPlace()) {
......@@ -91,7 +91,7 @@ class StackMKLDNNHandler
dst_md, stack_axis, srcs_md, this->engine_));
}
std::shared_ptr<mkldnn::memory> AcquireSrcMemory(const Tensor& input, int i) {
std::shared_ptr<dnnl::memory> AcquireSrcMemory(const Tensor& input, int i) {
const T* input_data = input.data<T>();
return this->AcquireMemoryFromPrimitive(this->fwd_pd_->src_desc(i),
to_void_cast<T>(input_data));
......@@ -122,9 +122,9 @@ class StackMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
std::unordered_map<int, memory> args;
for (size_t i = 0; i < multi_input.size(); ++i) {
srcs.push_back(handler.AcquireSrcMemory(*(multi_input[i]), i));
args.insert({MKLDNN_ARG_MULTIPLE_SRC + i, *(srcs.at(i))});
args.insert({DNNL_ARG_MULTIPLE_SRC + i, *(srcs.at(i))});
}
args.insert({MKLDNN_ARG_DST, *dst_mem});
args.insert({DNNL_ARG_DST, *dst_mem});
concat_p->execute(astream, args);
astream.wait();
......
......@@ -48,7 +48,7 @@ template <typename T>
class SumMKLDNNHandler
: public platform::MKLDNNHandlerNoCachingT<T, dnnl::sum> {
public:
SumMKLDNNHandler(mkldnn::engine engine, platform::Place cpu_place,
SumMKLDNNHandler(dnnl::engine engine, platform::Place cpu_place,
const std::vector<framework::Variable*>& in_vars,
framework::LoDTensor* z)
......@@ -57,21 +57,21 @@ class SumMKLDNNHandler
auto dst_tz = framework::vectorize<int64_t>(z->dims());
auto src_tz = dst_tz;
std::vector<mkldnn::memory::desc> srcs_md;
std::vector<dnnl::memory::desc> srcs_md;
for (size_t i = 0; i < in_vars.size(); i++) {
auto& input_it = in_vars[i]->Get<framework::LoDTensor>();
if (input_it.numel() == 0) {
continue;
}
MKLDNNMemoryFormat input_format = input_it.format();
srcs_md.push_back(mkldnn::memory::desc(
srcs_md.push_back(dnnl::memory::desc(
src_tz, platform::MKLDNNGetDataType<T>(), input_format));
++num_inputs_;
}
std::vector<float> scales(num_inputs_, 1.0);
auto dst_md = mkldnn::memory::desc(dst_tz, platform::MKLDNNGetDataType<T>(),
MKLDNNMemoryFormat::any);
auto dst_md = dnnl::memory::desc(dst_tz, platform::MKLDNNGetDataType<T>(),
MKLDNNMemoryFormat::any);
this->AcquireForwardPrimitiveDescriptor(dst_md, scales, srcs_md);
}
......@@ -79,14 +79,14 @@ class SumMKLDNNHandler
// (jczaja) sum oneDNN prim is not having .desc attribute so
// we cannot use base AcquireForwardPrimitiveDescriptor
void AcquireForwardPrimitiveDescriptor(
const mkldnn::memory::desc& dst_md, const std::vector<float>& scales,
const std::vector<mkldnn::memory::desc>& srcs_md) {
const dnnl::memory::desc& dst_md, const std::vector<float>& scales,
const std::vector<dnnl::memory::desc>& srcs_md) {
this->fwd_pd_.reset(
new dnnl::sum::primitive_desc(dst_md, scales, srcs_md, this->engine_));
}
std::shared_ptr<mkldnn::memory> AcquireSrcMemory(
const framework::Tensor& input, int i) {
std::shared_ptr<dnnl::memory> AcquireSrcMemory(const framework::Tensor& input,
int i) {
const T* input_data = input.data<T>();
return this->AcquireMemoryFromPrimitive(this->fwd_pd_->src_desc(i),
to_void_cast<T>(input_data));
......@@ -94,7 +94,7 @@ class SumMKLDNNHandler
using platform::MKLDNNHandlerNoCachingT<T, dnnl::sum>::AcquireDstMemory;
std::shared_ptr<mkldnn::memory> AcquireDstMemory(void) {
std::shared_ptr<dnnl::memory> AcquireDstMemory(void) {
return this->AcquireMemoryFromPrimitive(this->fwd_pd_->dst_desc());
}
......@@ -125,7 +125,7 @@ class SumMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
SumMKLDNNHandler<T> handler(mkldnn_engine, ctx.GetPlace(), in_vars, output);
// Create list of SRC MEMs
std::vector<std::shared_ptr<mkldnn::memory>> srcs_mem;
std::vector<std::shared_ptr<dnnl::memory>> srcs_mem;
srcs_mem.reserve(handler.GetNumInputs());
int input_index = 0;
for (size_t i = 0; i < in_vars.size(); i++) {
......@@ -147,11 +147,11 @@ class SumMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
auto sum_p = handler.AcquireForwardPrimitive();
std::unordered_map<int, mkldnn::memory> args;
std::unordered_map<int, dnnl::memory> args;
for (size_t i = 0; i < srcs_mem.size(); ++i) {
args.insert({MKLDNN_ARG_MULTIPLE_SRC + i, *(srcs_mem[i])});
args.insert({DNNL_ARG_MULTIPLE_SRC + i, *(srcs_mem[i])});
}
args.insert({MKLDNN_ARG_DST, *dst_mem});
args.insert({DNNL_ARG_DST, *dst_mem});
auto& astream = platform::MKLDNNDeviceContext::tls().get_stream();
sum_p->execute(astream, args);
......
......@@ -589,7 +589,7 @@ MKLDNNDeviceContext::MKLDNNDeviceContext(CPUPlace place)
}
MKLDNNDeviceContextThreadLocals::Body::Body()
: cur_engine(mkldnn::engine::kind::cpu, 0), cur_stream(cur_engine) {
: cur_engine(dnnl::engine::kind::cpu, 0), cur_stream(cur_engine) {
cur_mkldnn_session_id = kMKLDNNSessionID_Default;
cur_input_shape_str = "";
cur_input_shape_cache_capacity = 1;
......@@ -647,11 +647,11 @@ void MKLDNNDeviceContextThreadLocals::Body::log_lib_version(void) {
}
}
const mkldnn::engine& MKLDNNDeviceContextThreadLocals::Body::get_engine(void) {
const dnnl::engine& MKLDNNDeviceContextThreadLocals::Body::get_engine(void) {
return cur_engine;
}
mkldnn::stream& MKLDNNDeviceContextThreadLocals::Body::get_stream(void) {
dnnl::stream& MKLDNNDeviceContextThreadLocals::Body::get_stream(void) {
return cur_stream;
}
......
......@@ -46,8 +46,9 @@ limitations under the License. */
#endif
#ifdef PADDLE_WITH_MKLDNN
#include "mkldnn.hpp"
#include "dnnl.hpp"
#include "paddle/fluid/framework/data_layout.h"
namespace mkldnn = dnnl;
#endif
#include <map>
......@@ -63,6 +64,10 @@ limitations under the License. */
#endif
#include "unsupported/Eigen/CXX11/Tensor"
// This aias is required for now so that namespace name changes can be made to
// less than 20 files at a time. After all the names are changed it will be
// removed.
namespace Eigen {
struct DefaultDevice;
struct GpuDevice;
......@@ -706,8 +711,8 @@ class MKLDNNDeviceContextThreadLocals {
// know for converting MKL-DNN Tensor to non MKL-DNN
paddle::framework::DataLayout cur_paddle_data_layout;
// MKL-DNN stream used for execution of primitives (per-thread)
mkldnn::engine cur_engine;
mkldnn::stream cur_stream;
dnnl::engine cur_engine;
dnnl::stream cur_stream;
std::string key_suffix; // Key identifying current Executor
bool key_attach_thread_id = true;
void* exec_ptr_ = nullptr;
......@@ -721,8 +726,8 @@ class MKLDNNDeviceContextThreadLocals {
void set_cur_paddle_data_layout(framework::DataLayout dl);
framework::DataLayout get_cur_paddle_data_layout(void);
void log_lib_version(void);
const mkldnn::engine& get_engine(void);
mkldnn::stream& get_stream(void);
const dnnl::engine& get_engine(void);
dnnl::stream& get_stream(void);
void set_key_suffix(const std::string& suffix) { key_suffix = suffix; }
const std::string& get_key_suffix(void) const { return key_suffix; }
void disable_tid_in_key(void) { key_attach_thread_id = false; }
......@@ -776,7 +781,7 @@ class MKLDNNDeviceContext : public CPUDeviceContext {
explicit MKLDNNDeviceContext(CPUPlace place);
/* \brief Get the active engine */
const mkldnn::engine& GetEngine() const { return tls().get_engine(); }
const dnnl::engine& GetEngine() const { return tls().get_engine(); }
// Register object to currently used executor's map
void LinkEntryWithExecutor(BlobPtr_t<KeyBlob>, KeyBlob::iterator) const;
......
......@@ -334,34 +334,34 @@ inline dnnl::memory::format_tag GetMKLDNNFormat(const dnnl::memory memory) {
return GetMKLDNNFormat(mem_desc);
}
inline mkldnn::memory::format_tag GetPlainMKLDNNFormat(int tensor_rank) {
inline dnnl::memory::format_tag GetPlainMKLDNNFormat(int tensor_rank) {
switch (tensor_rank) {
case 1:
return mkldnn::memory::format_tag::a;
return dnnl::memory::format_tag::a;
break;
case 2:
return mkldnn::memory::format_tag::ab;
return dnnl::memory::format_tag::ab;
break;
case 3:
return mkldnn::memory::format_tag::abc;
return dnnl::memory::format_tag::abc;
break;
case 4:
return mkldnn::memory::format_tag::abcd;
return dnnl::memory::format_tag::abcd;
break;
case 5:
return mkldnn::memory::format_tag::abcde;
return dnnl::memory::format_tag::abcde;
break;
case 6:
return mkldnn::memory::format_tag::abcdef;
return dnnl::memory::format_tag::abcdef;
break;
case 7:
return mkldnn::memory::format_tag::abcdefg;
return dnnl::memory::format_tag::abcdefg;
break;
case 8:
return mkldnn::memory::format_tag::abcdefgh;
return dnnl::memory::format_tag::abcdefgh;
break;
case 9:
return mkldnn::memory::format_tag::abcdefghi;
return dnnl::memory::format_tag::abcdefghi;
break;
default:
PADDLE_THROW(platform::errors::Unimplemented(
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册