提交 9908d3cf 编写于 作者: M mozga-intel

MKLDNN layout: Support for convolution operator

上级 b7c683b8
......@@ -18,6 +18,17 @@
namespace paddle {
namespace operators {
using conv_bwd_data = mkldnn::convolution_backward_data;
using conv_bwd_weights = mkldnn::convolution_backward_weights;
using conv_fwd = mkldnn::convolution_forward;
using framework::DataLayout;
using mkldnn::memory;
using mkldnn::primitive;
using mkldnn::reorder;
using mkldnn::stream;
using platform::to_void_cast;
using platform::GetMKLDNNFormat;
template <typename T>
class ConvMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
public:
......@@ -25,6 +36,10 @@ class ConvMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
PADDLE_ENFORCE(paddle::platform::is_cpu_place(ctx.GetPlace()),
"It must use CPUPlace.");
// Get unique name for index
const std::string key = ctx.op().Output("Output");
const std::string key_conv_pd = key + "@conv_pd";
auto& dev_ctx =
ctx.template device_context<paddle::platform::MKLDNNDeviceContext>();
const auto& mkldnn_engine = dev_ctx.GetEngine();
......@@ -33,10 +48,12 @@ class ConvMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
auto* filter = ctx.Input<Tensor>("Filter");
auto* output = ctx.Output<Tensor>("Output");
// Get an unique name from "argument" name of "Output" variable
// This name will be used as key when saving info into device context
const std::string key = ctx.op().Output("Output");
const std::string key_conv_pd = key + "@conv_pd";
PADDLE_ENFORCE(input->layout() == DataLayout::kMKLDNN &&
input->format() != memory::format::format_undef,
"Wrong layout/format set for Input tensor");
PADDLE_ENFORCE(filter->layout() == DataLayout::kMKLDNN &&
filter->format() != memory::format::format_undef,
"Wrong layout/format set for Filter tensor");
std::vector<int> strides = ctx.Attr<std::vector<int>>("strides");
std::vector<int> paddings = ctx.Attr<std::vector<int>>("paddings");
......@@ -63,60 +80,86 @@ class ConvMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
paddle::framework::vectorize2int(filter->dims());
std::vector<int> dst_tz = paddle::framework::vectorize2int(output->dims());
// TODO(pzelazko-intel): support more formats
auto src_md = platform::MKLDNNMemDesc(
src_tz, mkldnn::memory::data_type::f32, mkldnn::memory::format::nchw);
auto weights_md =
platform::MKLDNNMemDesc(weights_tz, mkldnn::memory::data_type::f32,
mkldnn::memory::format::oihw);
auto dst_md = platform::MKLDNNMemDesc(
dst_tz, mkldnn::memory::data_type::f32, mkldnn::memory::format::nchw);
auto src_memory =
mkldnn::memory({src_md, mkldnn_engine},
reinterpret_cast<void*>(const_cast<T*>(input_data)));
auto weights_memory =
mkldnn::memory({weights_md, mkldnn_engine},
reinterpret_cast<void*>(const_cast<T*>(filter_data)));
auto dst_memory = mkldnn::memory({dst_md, mkldnn_engine}, output_data);
std::shared_ptr<mkldnn::convolution_forward::primitive_desc> conv_pd =
ConvFwdPrimitiveDesc(src_md, weights_md, dst_md, strides, paddings,
mkldnn_engine);
// save conv_pd into global device context to be referred in backward path
dev_ctx.SetBlob(key_conv_pd, conv_pd);
// create mkldnn memory from input tensors (data/weights)
auto user_src_memory = memory(
{{{src_tz}, memory::data_type::f32, input->format()}, mkldnn_engine},
to_void_cast(input_data));
auto user_weights_memory =
memory({{{weights_tz}, memory::data_type::f32, filter->format()},
mkldnn_engine},
to_void_cast(filter_data));
/* create memory descriptor for convolution without specified format
* ('any') which lets a primitive (convolution in this case) choose
* the memory format preferred for best performance
*/
auto src_md = platform::MKLDNNMemDesc(src_tz, memory::data_type::f32,
memory::format::any);
auto weights_md = platform::MKLDNNMemDesc(
weights_tz, memory::data_type::f32, memory::format::any);
auto dst_md = platform::MKLDNNMemDesc(dst_tz, memory::data_type::f32,
memory::format::any);
// create a conv primitive descriptor and save it for usage in backward
std::shared_ptr<conv_fwd::primitive_desc> conv_pd = ConvFwdPrimitiveDesc(
src_md, weights_md, dst_md, strides, paddings, mkldnn_engine);
// create reorder primitive if the input format is not the preferred one
auto src_memory = user_src_memory;
primitive reorder_src;
bool is_src_reordered = false;
if (memory::primitive_desc(conv_pd->src_primitive_desc()) !=
user_src_memory.get_primitive_desc()) {
src_memory = memory(conv_pd->src_primitive_desc());
reorder_src = reorder(user_src_memory, src_memory);
is_src_reordered = true;
}
auto weights_memory = user_weights_memory;
primitive reorder_weights;
bool is_weights_reordered = false;
if (memory::primitive_desc(conv_pd->weights_primitive_desc()) !=
user_weights_memory.get_primitive_desc()) {
weights_memory = memory(conv_pd->weights_primitive_desc());
reorder_weights = reorder(user_weights_memory, weights_memory);
is_weights_reordered = true;
}
// create memory primitive for conv dst
auto dst_memory = memory(conv_pd->dst_primitive_desc(), output_data);
// create convolution op primitive
auto conv_prim = mkldnn::convolution_forward(*conv_pd, src_memory,
weights_memory, dst_memory);
auto conv_prim = conv_fwd(*conv_pd, src_memory, weights_memory, dst_memory);
// push primitive to stream and wait until it's executed
std::vector<mkldnn::primitive> pipeline{conv_prim};
mkldnn::stream(mkldnn::stream::kind::eager).submit(pipeline).wait();
std::vector<primitive> pipeline;
if (is_src_reordered) pipeline.push_back(reorder_src);
if (is_weights_reordered) pipeline.push_back(reorder_weights);
pipeline.push_back(conv_prim);
stream(stream::kind::eager).submit(pipeline).wait();
// Save conv_pd/src_memory/weights_memory for backward pass
dev_ctx.SetBlob(key_conv_pd, conv_pd);
output->set_layout(DataLayout::kMKLDNN);
output->set_format(GetMKLDNNFormat(dst_memory));
}
private:
std::unique_ptr<mkldnn::convolution_forward::primitive_desc>
ConvFwdPrimitiveDesc(const mkldnn::memory::desc& src,
const mkldnn::memory::desc& weights,
const mkldnn::memory::desc& dst,
const std::vector<int>& strides,
const std::vector<int>& paddings,
const mkldnn::engine& engine) const {
mkldnn::memory::dims stride_dims = {strides[0], strides[1]};
mkldnn::memory::dims padding_dims = {paddings[0], paddings[1]};
auto conv_desc = mkldnn::convolution_forward::desc(
mkldnn::prop_kind::forward, mkldnn::convolution_direct, src, weights,
dst, stride_dims, padding_dims, padding_dims,
mkldnn::padding_kind::zero);
auto p_conv_pd =
new mkldnn::convolution_forward::primitive_desc(conv_desc, engine);
return std::unique_ptr<mkldnn::convolution_forward::primitive_desc>(
p_conv_pd);
std::unique_ptr<conv_fwd::primitive_desc> ConvFwdPrimitiveDesc(
const memory::desc& src, const memory::desc& weights,
const memory::desc& dst, const std::vector<int>& strides,
const std::vector<int>& paddings, const mkldnn::engine& engine) const {
memory::dims stride_dims = {strides[0], strides[1]};
memory::dims padding_dims = {paddings[0], paddings[1]};
auto conv_desc =
conv_fwd::desc(mkldnn::prop_kind::forward, mkldnn::convolution_direct,
src, weights, dst, stride_dims, padding_dims,
padding_dims, mkldnn::padding_kind::zero);
auto p_conv_pd = new conv_fwd::primitive_desc(conv_desc, engine);
return std::unique_ptr<conv_fwd::primitive_desc>(p_conv_pd);
}
};
......@@ -139,6 +182,19 @@ class ConvMKLDNNGradOpKernel : public paddle::framework::OpKernel<T> {
Tensor* input_grad = ctx.Output<Tensor>(framework::GradVarName("Input"));
Tensor* filter_grad = ctx.Output<Tensor>(framework::GradVarName("Filter"));
PADDLE_ENFORCE(input->layout() == DataLayout::kMKLDNN &&
input->format() != memory::format::format_undef,
"Wrong layout/format set for Input tensor");
PADDLE_ENFORCE(filter->layout() == DataLayout::kMKLDNN &&
filter->format() != memory::format::format_undef,
"Wrong layout/format set for Filter tensor");
PADDLE_ENFORCE(output->layout() == DataLayout::kMKLDNN &&
output->format() != memory::format::format_undef,
"Wrong layout/format set for Output tensor");
PADDLE_ENFORCE(output_grad->layout() == DataLayout::kMKLDNN &&
output_grad->format() != memory::format::format_undef,
"Wrong layout/format set for output_grad tensor");
if (!input_grad && !filter_grad) return;
// Get an unique name from "argument" name of "Output" variable
......@@ -167,108 +223,147 @@ class ConvMKLDNNGradOpKernel : public paddle::framework::OpKernel<T> {
paddle::framework::vectorize2int(filter->dims());
std::vector<int> dst_tz = paddle::framework::vectorize2int(output->dims());
// TODO(pzelazko-intel): support more formats
auto src_md = platform::MKLDNNMemDesc(
src_tz, mkldnn::memory::data_type::f32, mkldnn::memory::format::nchw);
auto diff_src_md = platform::MKLDNNMemDesc(
src_tz, mkldnn::memory::data_type::f32, mkldnn::memory::format::nchw);
auto weights_md =
platform::MKLDNNMemDesc(weights_tz, mkldnn::memory::data_type::f32,
mkldnn::memory::format::oihw);
auto diff_weights_md =
platform::MKLDNNMemDesc(weights_tz, mkldnn::memory::data_type::f32,
mkldnn::memory::format::oihw);
auto diff_dst_md = platform::MKLDNNMemDesc(
dst_tz, mkldnn::memory::data_type::f32, mkldnn::memory::format::nchw);
// create memory
auto diff_dst_memory = mkldnn::memory(
{diff_weights_md, mkldnn_engine},
reinterpret_cast<void*>(const_cast<T*>(output_grad_data)));
// create mkldnn memory from input tensors (input/weights/output_grad)
auto user_src_memory = memory(
{{{src_tz}, memory::data_type::f32, input->format()}, mkldnn_engine},
to_void_cast(input_data));
auto user_weights_memory =
memory({{{weights_tz}, memory::data_type::f32, filter->format()},
mkldnn_engine},
to_void_cast(filter_data));
auto user_diff_dst_memory =
memory({{{dst_tz}, memory::data_type::f32, output_grad->format()},
mkldnn_engine},
to_void_cast(output_grad_data));
/* create memory descriptor for conv backward without specified format
* ('any') which lets a primitive (conv backward in this case) choose
* the memory format preferred for best performance
*/
auto src_md = platform::MKLDNNMemDesc(src_tz, memory::data_type::f32,
memory::format::any);
auto diff_src_md = platform::MKLDNNMemDesc(src_tz, memory::data_type::f32,
memory::format::any);
auto weights_md = platform::MKLDNNMemDesc(
weights_tz, memory::data_type::f32, memory::format::any);
auto diff_weights_md = platform::MKLDNNMemDesc(
weights_tz, memory::data_type::f32, memory::format::any);
auto diff_dst_md = platform::MKLDNNMemDesc(dst_tz, memory::data_type::f32,
memory::format::any);
// Retrieve conv_pd from device context
auto conv_pd =
std::static_pointer_cast<mkldnn::convolution_forward::primitive_desc>(
dev_ctx.GetBlob(key_conv_pd));
auto conv_pd = std::static_pointer_cast<conv_fwd::primitive_desc>(
dev_ctx.GetBlob(key_conv_pd));
PADDLE_ENFORCE(conv_pd != nullptr,
"Fail to find conv_pd in device context");
// create backward conv primitive for weights
if (filter_grad) {
// create primitive descriptor
mkldnn::convolution_backward_weights::primitive_desc conv_bwd_weights_pd =
ConvBwdWeightsPrimitiveDesc(src_md, diff_weights_md, diff_dst_md,
strides, paddings, *conv_pd,
mkldnn_engine);
// create memory
// create backward convolution primitive descriptor
auto conv_bwd_weights_desc = conv_bwd_weights::desc(
mkldnn::convolution_direct, src_md, diff_weights_md, diff_dst_md,
strides, paddings, paddings, mkldnn::padding_kind::zero);
auto conv_bwd_weights_pd = conv_bwd_weights::primitive_desc(
conv_bwd_weights_desc, mkldnn_engine, *conv_pd);
// create reorder primitive if the input format is not the preferred one
auto src_memory = user_src_memory;
primitive reorder_src;
bool is_src_reordered = false;
if (memory::primitive_desc(conv_bwd_weights_pd.src_primitive_desc()) !=
user_src_memory.get_primitive_desc()) {
src_memory = memory(conv_bwd_weights_pd.src_primitive_desc());
reorder_src = reorder(user_src_memory, src_memory);
is_src_reordered = true;
}
auto diff_dst_memory_4filter = user_diff_dst_memory;
primitive reorder_diff_dst_4filter;
bool is_diff_dst_reordered_4filter = false;
if (memory::primitive_desc(
conv_bwd_weights_pd.diff_dst_primitive_desc()) !=
user_diff_dst_memory.get_primitive_desc()) {
diff_dst_memory_4filter =
memory(conv_bwd_weights_pd.diff_dst_primitive_desc());
reorder_diff_dst_4filter =
reorder(user_diff_dst_memory, diff_dst_memory_4filter);
is_diff_dst_reordered_4filter = true;
}
// create mkldnn memory for output (i.e. diff weights)
auto diff_weights_memory =
mkldnn::memory({diff_weights_md, mkldnn_engine},
reinterpret_cast<void*>(filter_grad_data));
auto src_memory =
mkldnn::memory({src_md, mkldnn_engine},
reinterpret_cast<void*>(const_cast<T*>(input_data)));
memory(conv_bwd_weights_pd.diff_weights_primitive_desc(),
reinterpret_cast<void*>(filter_grad_data));
// create backward conv primitive for weights
auto conv_bwd_weights_prim = mkldnn::convolution_backward_weights(
conv_bwd_weights_pd, src_memory, diff_dst_memory,
diff_weights_memory);
auto conv_bwd_weights_prim =
conv_bwd_weights(conv_bwd_weights_pd, src_memory,
diff_dst_memory_4filter, diff_weights_memory);
// push primitive and execute it
std::vector<mkldnn::primitive> pipeline{conv_bwd_weights_prim};
mkldnn::stream(mkldnn::stream::kind::eager).submit(pipeline).wait();
std::vector<primitive> pipeline;
if (is_src_reordered) pipeline.push_back(reorder_src);
if (is_diff_dst_reordered_4filter)
pipeline.push_back(reorder_diff_dst_4filter);
pipeline.push_back(conv_bwd_weights_prim);
stream(stream::kind::eager).submit(pipeline).wait();
filter_grad->set_layout(DataLayout::kMKLDNN);
filter_grad->set_format(GetMKLDNNFormat(diff_weights_memory));
}
if (input_grad) {
// create primitive descriptor
mkldnn::convolution_backward_data::primitive_desc conv_bwd_data_pd =
ConvBwdDataPrimitiveDesc(diff_src_md, weights_md, diff_dst_md,
strides, paddings, *conv_pd, mkldnn_engine);
// create memory
auto diff_src_memory = mkldnn::memory(
{diff_src_md, mkldnn_engine},
reinterpret_cast<void*>(const_cast<T*>(input_grad_data)));
auto weights_memory =
mkldnn::memory({weights_md, mkldnn_engine},
reinterpret_cast<void*>(const_cast<T*>(filter_data)));
// create backward convolution primitive descriptor
auto conv_bwd_data_desc = conv_bwd_data::desc(
mkldnn::convolution_direct, diff_src_md, weights_md, diff_dst_md,
strides, paddings, paddings, mkldnn::padding_kind::zero);
auto conv_bwd_data_pd = conv_bwd_data::primitive_desc(
conv_bwd_data_desc, mkldnn_engine, *conv_pd);
// create reorder primitive if the input format is not the preferred one
auto weights_memory = user_weights_memory;
primitive reorder_weights;
bool is_weights_reordered = false;
if (memory::primitive_desc(conv_bwd_data_pd.weights_primitive_desc()) !=
user_weights_memory.get_primitive_desc()) {
weights_memory = memory(conv_bwd_data_pd.weights_primitive_desc());
reorder_weights = reorder(user_weights_memory, weights_memory);
is_weights_reordered = true;
}
auto diff_dst_memory_4data = user_diff_dst_memory;
primitive reorder_diff_dst_4data;
bool is_diff_dst_reordered_4data = false;
if (memory::primitive_desc(conv_bwd_data_pd.diff_dst_primitive_desc()) !=
user_diff_dst_memory.get_primitive_desc()) {
diff_dst_memory_4data =
memory(conv_bwd_data_pd.diff_dst_primitive_desc());
reorder_diff_dst_4data =
reorder(user_diff_dst_memory, diff_dst_memory_4data);
is_diff_dst_reordered_4data = true;
}
// create mkldnn memory for output (i.e. diff src)
auto diff_src_memory = memory(conv_bwd_data_pd.diff_src_primitive_desc(),
reinterpret_cast<void*>(input_grad_data));
// create backward conv primitive for data
auto conv_bwd_data_prim = mkldnn::convolution_backward_data(
conv_bwd_data_pd, diff_dst_memory, weights_memory, diff_src_memory);
auto conv_bwd_data_prim =
conv_bwd_data(conv_bwd_data_pd, diff_dst_memory_4data, weights_memory,
diff_src_memory);
// push primitive to stream and wait until it's executed
std::vector<mkldnn::primitive> pipeline{conv_bwd_data_prim};
mkldnn::stream(mkldnn::stream::kind::eager).submit(pipeline).wait();
// push primitive and execute it
std::vector<primitive> pipeline;
if (is_weights_reordered) pipeline.push_back(reorder_weights);
if (is_diff_dst_reordered_4data)
pipeline.push_back(reorder_diff_dst_4data);
pipeline.push_back(conv_bwd_data_prim);
stream(stream::kind::eager).submit(pipeline).wait();
input_grad->set_layout(DataLayout::kMKLDNN);
input_grad->set_format(GetMKLDNNFormat(diff_src_memory));
}
} // Compute()
private:
mkldnn::convolution_backward_weights::primitive_desc
ConvBwdWeightsPrimitiveDesc(
const mkldnn::memory::desc& src, const mkldnn::memory::desc& diff_weights,
const mkldnn::memory::desc& diff_dst, const std::vector<int>& strides,
const std::vector<int>& paddings,
const mkldnn::convolution_forward::primitive_desc& conv_pd,
const mkldnn::engine& engine) const {
auto conv_bwd_weights_desc = mkldnn::convolution_backward_weights::desc(
mkldnn::convolution_direct, src, diff_weights, diff_dst, strides,
paddings, paddings, mkldnn::padding_kind::zero);
return mkldnn::convolution_backward_weights::primitive_desc(
conv_bwd_weights_desc, engine, conv_pd);
}
mkldnn::convolution_backward_data::primitive_desc ConvBwdDataPrimitiveDesc(
const mkldnn::memory::desc& diff_src, const mkldnn::memory::desc& weights,
const mkldnn::memory::desc& diff_dst, const std::vector<int>& strides,
const std::vector<int>& paddings,
const mkldnn::convolution_forward::primitive_desc& conv_pd,
const mkldnn::engine& engine) const {
auto conv_bwd_data_desc = mkldnn::convolution_backward_data::desc(
mkldnn::convolution_direct, diff_src, weights, diff_dst, strides,
paddings, paddings, mkldnn::padding_kind::zero);
return mkldnn::convolution_backward_data::primitive_desc(conv_bwd_data_desc,
engine, conv_pd);
}
};
} // namespace operators
......
......@@ -75,9 +75,8 @@ void ConvOp::InferShape(framework::InferShapeContext* ctx) const {
framework::OpKernelType ConvOp::GetExpectedKernelType(
const framework::ExecutionContext& ctx) const {
framework::LibraryType library{framework::LibraryType::kPlain};
std::string data_format = ctx.Attr<std::string>("data_format");
// TODO(pzelazko-intel): enable MKLDNN layout when it's ready
std::string data_format = ctx.Attr<std::string>("data_format");
framework::DataLayout layout = framework::StringToDataLayout(data_format);
#ifdef PADDLE_WITH_CUDA
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册