未验证 提交 7cb4a8b8 编写于 作者: L lidanqing 提交者: GitHub

[oneDNN] Conv dilation support (#27914)

* conv dilated mkldnn support: forward and backward pass

* add mkldnn conv_transpose dilation UT
test=develop

* remove unnecessary PADDLE_ENFORCE

* add int8 and bf16 dilated conv UT

* update according to reviews
上级 64c26349
......@@ -84,19 +84,6 @@ void ConvBiasFusePass::ApplyImpl(ir::Graph* graph) const {
VLOG(3) << "do not perform " + type() + "+bias fuse";
return;
}
if (conv->Op()->HasAttr("dilations")) {
auto dilations =
BOOST_GET_CONST(std::vector<int>, conv->Op()->GetAttr("dilations"));
for (const auto& d : dilations) {
if (d != 1) {
LOG(WARNING)
<< "dilation conv not supported in MKLDNN, fuse not apply "
<< "and set conv attribute use_mkldnn = false";
conv->Op()->SetAttr("use_mkldnn", false);
return;
}
}
}
auto* eltwise_bias_tensor =
scope->FindVar(eltwise_bias->Name())->GetMutable<LoDTensor>();
......
......@@ -193,13 +193,8 @@ class ConvMKLDNNHandlerT
data_dims, strides, ksize);
const bool is_conv3d = strides.size() == 3U;
PADDLE_ENFORCE_EQ(
is_conv3d
? dilations.size() == 3 && dilations[0] == 1 &&
dilations[1] == 1 && dilations[2] == 1
: dilations.size() == 2 && dilations[0] == 1 && dilations[1] == 1,
true, platform::errors::Unimplemented(
"Dilation in oneDNN convolution is not implemented yet"));
std::transform(dilations.begin(), dilations.end(), dilations.begin(),
[](int64_t i) { return i - 1; });
const auto src_tz = paddle::framework::vectorize(input->dims());
......@@ -210,6 +205,7 @@ class ConvMKLDNNHandlerT
const mkldnn::memory::dims stride_dims = strides;
const auto mkldnn_paddings = platform::ToMkldnnPadding(paddings);
const mkldnn::memory::dims dilations_dims = dilations;
/* create memory descriptor for convolution without specified format
* ('any') which lets a primitive (convolution in this case) choose
......@@ -256,13 +252,13 @@ class ConvMKLDNNHandlerT
this->AcquireForwardPrimitiveDescriptor(
conv_attr, fwd_prop_kind, dnnl::algorithm::convolution_direct,
src_md, weights_md, bias_md, dst_md, stride_dims,
src_md, weights_md, bias_md, dst_md, stride_dims, dilations_dims,
mkldnn_paddings[0], mkldnn_paddings[1]);
} else {
this->AcquireForwardPrimitiveDescriptor(
conv_attr, fwd_prop_kind, dnnl::algorithm::convolution_direct,
src_md, weights_md, dst_md, stride_dims, mkldnn_paddings[0],
mkldnn_paddings[1]);
src_md, weights_md, dst_md, stride_dims, dilations_dims,
mkldnn_paddings[0], mkldnn_paddings[1]);
}
}
}
......@@ -619,9 +615,8 @@ class ConvMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
bool is_conv3d = strides.size() == 3U;
PADDLE_ENFORCE_NE(is_conv3d, true,
platform::errors::InvalidArgument(
"int8 does not support conv3d currently, should "
"set param is_conv3d as False"));
platform::errors::Unimplemented(
"int8 does not support conv3d currently"));
auto input_dims = input->dims();
auto data_dims = framework::slice_ddim(input_dims, 2, input_dims.size());
......@@ -641,13 +636,8 @@ class ConvMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
GetWeightsTz(weights_tz, g);
auto dst_tz = paddle::framework::vectorize(output->dims());
PADDLE_ENFORCE_EQ(
is_conv3d
? dilations.size() == 3 && dilations[0] == 1 &&
dilations[1] == 1 && dilations[2] == 1
: dilations.size() == 2 && dilations[0] == 1 && dilations[1] == 1,
true, platform::errors::Unimplemented(
"dilation in convolution is not implemented yet"));
std::transform(dilations.begin(), dilations.end(), dilations.begin(),
[](int64_t i) { return i - 1; });
const K* filter_data = filter->data<K>();
auto scale_in_data = ctx.Attr<float>("Scale_in");
......@@ -710,13 +700,13 @@ class ConvMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
auto bias_md = platform::MKLDNNMemDesc(bias_tz, memory::data_type::s32,
MKLDNNMemoryFormat::x);
conv_pd = handler->AcquireConvolutionPrimitiveDescriptor(
src_md, weights_md, bias_md, dst_md, strides, paddings,
src_md, weights_md, bias_md, dst_md, strides, dilations, paddings,
mkldnn_engine, fuse_activation, fuse_alpha, fuse_beta,
fuse_residual_conn, propagation, output_shift_scale, sum_scale);
} else {
conv_pd = handler->AcquireConvolutionPrimitiveDescriptor(
src_md, weights_md, boost::none, dst_md, strides, paddings,
mkldnn_engine, fuse_activation, fuse_alpha, fuse_beta,
src_md, weights_md, boost::none, dst_md, strides, dilations,
paddings, mkldnn_engine, fuse_activation, fuse_alpha, fuse_beta,
fuse_residual_conn, propagation, output_shift_scale, sum_scale);
}
......@@ -1019,11 +1009,14 @@ class ConvMKLDNNGradOpKernel : public paddle::framework::OpKernel<T> {
"Fail to find conv_pd in device context"));
auto mkldnn_paddings = platform::ToMkldnnPadding(paddings);
std::transform(dilations.begin(), dilations.end(), dilations.begin(),
[](int64_t i) { return i - 1; });
const mkldnn::memory::dims dilations_dims = dilations;
// create backward convolution weights primitive descriptor
auto conv_bwd_weights_desc = mkldnn::convolution_backward_weights::desc(
mkldnn::algorithm::convolution_direct, src_md, diff_weights_md,
diff_dst_md, strides, mkldnn_paddings[0], mkldnn_paddings[1]);
diff_dst_md, strides, dilations_dims, mkldnn_paddings[0],
mkldnn_paddings[1]);
auto conv_bwd_weights_pd =
std::make_shared<mkldnn::convolution_backward_weights::primitive_desc>(
......@@ -1032,7 +1025,8 @@ class ConvMKLDNNGradOpKernel : public paddle::framework::OpKernel<T> {
// create backward convolution data primitive descriptor
auto conv_bwd_data_desc = mkldnn::convolution_backward_data::desc(
mkldnn::algorithm::convolution_direct, diff_src_md, weights_md,
diff_dst_md, strides, mkldnn_paddings[0], mkldnn_paddings[1]);
diff_dst_md, strides, dilations_dims, mkldnn_paddings[0],
mkldnn_paddings[1]);
auto conv_bwd_data_pd =
std::make_shared<mkldnn::convolution_backward_data::primitive_desc>(
......
......@@ -104,6 +104,11 @@ class ConvTransposeMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
int groups = ctx.Attr<int>("groups");
std::string padding_algorithm = ctx.Attr<std::string>("padding_algorithm");
PADDLE_ENFORCE_EQ(
strides.size(), 2,
platform::errors::Unimplemented(
"Now we only support 2d oneDNN convolution transpose op"));
auto input_dims = input->dims();
auto data_dims = framework::slice_ddim(input_dims, 2, input_dims.size());
auto filter_dims = filter->dims();
......@@ -115,10 +120,8 @@ class ConvTransposeMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
UpdatePaddingAndDilation(&paddings, &dilations, padding_algorithm,
data_dims, strides, ksize);
PADDLE_ENFORCE(
dilations.size() == 2 && dilations[0] == 1 && dilations[1] == 1,
platform::errors::Unimplemented(
"dilation in convolution is not implemented yet"));
std::transform(dilations.begin(), dilations.end(), dilations.begin(),
[](int64_t i) { return i - 1; });
const T* input_data = input->data<T>();
const T* filter_data = filter->data<T>();
......@@ -210,11 +213,12 @@ class ConvTransposeMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
auto bias_md = platform::MKLDNNMemDesc(
bias_tz, platform::MKLDNNGetDataType<T>(), MKLDNNMemoryFormat::x);
conv_transpose_pd = handler.AcquireConvolutionPrimitiveDescriptor(
src_md, weights_md, bias_md, dst_md, strides, paddings, mkldnn_engine,
fuse_activation, fuse_alpha, fuse_beta, false, fwd_prop_kind);
src_md, weights_md, bias_md, dst_md, strides, dilations, paddings,
mkldnn_engine, fuse_activation, fuse_alpha, fuse_beta, false,
fwd_prop_kind);
} else {
conv_transpose_pd = handler.AcquireConvolutionPrimitiveDescriptor(
src_md, weights_md, boost::none, dst_md, strides, paddings,
src_md, weights_md, boost::none, dst_md, strides, dilations, paddings,
mkldnn_engine, fuse_activation, fuse_alpha, fuse_beta, false,
fwd_prop_kind);
}
......
......@@ -1330,6 +1330,7 @@ class ConvMKLDNNTemplateHandler : public MKLDNNHandler {
const mkldnn::memory::desc& src, const mkldnn::memory::desc& weights,
boost::optional<const mkldnn::memory::desc&> bias,
const mkldnn::memory::desc& dst, const std::vector<int64_t>& strides,
const std::vector<int64_t>& dilations,
const std::vector<int64_t>& paddings, const mkldnn::engine& engine,
const std::string& fuse_activation, float fuse_alpha, float fuse_beta,
const bool fuse_residual_conn, mkldnn::prop_kind fwd_prop_kind,
......@@ -1352,18 +1353,18 @@ class ConvMKLDNNTemplateHandler : public MKLDNNHandler {
dev_ctx_.GetBlob(key_conv_pd));
if (conv_pd_ == nullptr) {
mkldnn::memory::dims stride_dims = strides;
mkldnn::memory::dims dilations_dims = dilations;
auto mkldnn_paddings = ToMkldnnPadding(paddings);
auto conv_desc =
bias ? typename forward_t::desc(
fwd_prop_kind, convolutional_algorithm<forward_t>::T,
src, weights, *bias, dst, stride_dims,
src, weights, *bias, dst, stride_dims, dilations_dims,
mkldnn_paddings[0], mkldnn_paddings[1])
: typename forward_t::desc(
fwd_prop_kind, convolutional_algorithm<forward_t>::T,
src, weights, dst, stride_dims, mkldnn_paddings[0],
mkldnn_paddings[1]);
src, weights, dst, stride_dims, dilations_dims,
mkldnn_paddings[0], mkldnn_paddings[1]);
mkldnn::primitive_attr conv_attr =
CreatePostOps(fuse_activation, fuse_alpha, fuse_beta,
......
......@@ -23,12 +23,6 @@ from paddle.fluid.tests.unittests.op_test import OpTest, convert_float_to_uint16
from paddle.fluid.tests.unittests.test_conv2d_op import conv2d_forward_naive, TestConv2dOp
def conv2d_forward_refer(input, filter, group, conv_param):
out, in_n, out_h, out_w, out_c = conv2d_forward_naive(input, filter, group,
conv_param)
return out
def conv2d_residual_naive(out, residual):
assert out.shape == residual.shape
out = np.add(out, residual)
......@@ -176,6 +170,21 @@ class TestWithStride(TestConv2dBf16Op):
self.input_type = np.uint16
class TestWithDilations(TestConv2dBf16Op):
def init_test_case(self):
self.pad = [1, 1]
self.stride = [1, 1]
self.dilations = [2, 2]
self.input_size = [2, 3, 10, 10]
self.input_residual_size = [2, 6, 8, 8]
assert np.mod(self.input_size[1], self.groups) == 0
f_c = self.input_size[1] // self.groups
self.filter_size = [6, f_c, 3, 3]
def init_data_type(self):
self.input_type = np.uint16
class TestWith1x1ForceFP32Output(TestConv2dBf16Op):
def init_test_case(self):
self.pad = [0, 0]
......
......@@ -228,6 +228,22 @@ class TestWithStride(TestConv2dInt8Op):
self.scale_in_eltwise = 0.5
class TestWithDilations(TestConv2dInt8Op):
def init_test_case(self):
self.pad = [1, 1]
self.stride = [1, 1]
self.dilations = [2, 2]
self.input_size = [2, 3, 10, 10]
self.input_residual_size = [2, 6, 8, 8]
assert np.mod(self.input_size[1], self.groups) == 0
f_c = self.input_size[1] // self.groups
self.filter_size = [6, f_c, 3, 3]
self.scale_in = 1.0
self.scale_out = 0.8
self.scale_weights = [10.0]
self.scale_in_eltwise = 0.5
class TestWith1x1(TestConv2dInt8Op):
def init_test_case(self):
self.pad = [0, 0]
......@@ -343,6 +359,7 @@ def create_test_int8_class(parent):
create_test_int8_class(TestConv2dInt8Op)
create_test_int8_class(TestWithPad)
create_test_int8_class(TestWithStride)
create_test_int8_class(TestWithDilations)
create_test_int8_class(TestWithGroup)
create_test_int8_class(TestWith1x1)
create_test_int8_class(TestWithInput1x1Filter1x1)
......
......@@ -215,5 +215,22 @@ class TestConv2dOp_AsyPadding_NHWC_MKLDNN(TestConv2dOp_Valid_NHWC_MKLDNN):
self.padding_algorithm = "EXPLICIT"
class TestMKLDNNDilations(TestConv2dMKLDNNOp):
def init_test_case(self):
TestConv2dMKLDNNOp.init_test_case(self)
self.pad = [0, 0]
self.stride = [1, 1]
self.input_size = [2, 3, 10, 10] # NCHW
assert np.mod(self.input_size[1], self.groups) == 0
f_c = self.input_size[1] // self.groups
self.filter_size = [12, f_c, 3, 3]
def init_dilation(self):
self.dilations = [2, 2]
def init_group(self):
self.groups = 3
if __name__ == '__main__':
unittest.main()
......@@ -136,3 +136,17 @@ class TestMKLDNNWithValidPad_NHWC(TestMKLDNNWithValidPad):
self.data_format = "NHWC"
N, C, H, W = self.input_size
self.input_size = [N, H, W, C]
class TestConv2dTransposeMKLDNNWithDilationsExplicitPad(
TestConv2dTransposeMKLDNNOp):
def init_test_case(self):
TestConv2dTransposeMKLDNNOp.init_test_case(self)
self.stride = [2, 1]
self.dilations = [1, 2]
self.groups = 1
self.input_size = [4, 3, 8, 7] # NCHW
f_c = self.input_size[1]
self.filter_size = [f_c, 6, 4, 3]
self.pad = [1, 3, 2, 1]
self.padding_algorithm = "EXPLICIT"
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册