未验证 提交 7cb4a8b8 编写于 作者: L lidanqing 提交者: GitHub

[oneDNN] Conv dilation support (#27914)

* conv dilated mkldnn support: forward and backward pass

* add mkldnn conv_transpose dilation UT
test=develop

* remove unnecessary PADDLE_ENFORCE

* add int8 and bf16 dilated conv UT

* update according to reviews
上级 64c26349
...@@ -84,19 +84,6 @@ void ConvBiasFusePass::ApplyImpl(ir::Graph* graph) const { ...@@ -84,19 +84,6 @@ void ConvBiasFusePass::ApplyImpl(ir::Graph* graph) const {
VLOG(3) << "do not perform " + type() + "+bias fuse"; VLOG(3) << "do not perform " + type() + "+bias fuse";
return; return;
} }
if (conv->Op()->HasAttr("dilations")) {
auto dilations =
BOOST_GET_CONST(std::vector<int>, conv->Op()->GetAttr("dilations"));
for (const auto& d : dilations) {
if (d != 1) {
LOG(WARNING)
<< "dilation conv not supported in MKLDNN, fuse not apply "
<< "and set conv attribute use_mkldnn = false";
conv->Op()->SetAttr("use_mkldnn", false);
return;
}
}
}
auto* eltwise_bias_tensor = auto* eltwise_bias_tensor =
scope->FindVar(eltwise_bias->Name())->GetMutable<LoDTensor>(); scope->FindVar(eltwise_bias->Name())->GetMutable<LoDTensor>();
......
...@@ -193,13 +193,8 @@ class ConvMKLDNNHandlerT ...@@ -193,13 +193,8 @@ class ConvMKLDNNHandlerT
data_dims, strides, ksize); data_dims, strides, ksize);
const bool is_conv3d = strides.size() == 3U; const bool is_conv3d = strides.size() == 3U;
PADDLE_ENFORCE_EQ( std::transform(dilations.begin(), dilations.end(), dilations.begin(),
is_conv3d [](int64_t i) { return i - 1; });
? dilations.size() == 3 && dilations[0] == 1 &&
dilations[1] == 1 && dilations[2] == 1
: dilations.size() == 2 && dilations[0] == 1 && dilations[1] == 1,
true, platform::errors::Unimplemented(
"Dilation in oneDNN convolution is not implemented yet"));
const auto src_tz = paddle::framework::vectorize(input->dims()); const auto src_tz = paddle::framework::vectorize(input->dims());
...@@ -210,6 +205,7 @@ class ConvMKLDNNHandlerT ...@@ -210,6 +205,7 @@ class ConvMKLDNNHandlerT
const mkldnn::memory::dims stride_dims = strides; const mkldnn::memory::dims stride_dims = strides;
const auto mkldnn_paddings = platform::ToMkldnnPadding(paddings); const auto mkldnn_paddings = platform::ToMkldnnPadding(paddings);
const mkldnn::memory::dims dilations_dims = dilations;
/* create memory descriptor for convolution without specified format /* create memory descriptor for convolution without specified format
* ('any') which lets a primitive (convolution in this case) choose * ('any') which lets a primitive (convolution in this case) choose
...@@ -256,13 +252,13 @@ class ConvMKLDNNHandlerT ...@@ -256,13 +252,13 @@ class ConvMKLDNNHandlerT
this->AcquireForwardPrimitiveDescriptor( this->AcquireForwardPrimitiveDescriptor(
conv_attr, fwd_prop_kind, dnnl::algorithm::convolution_direct, conv_attr, fwd_prop_kind, dnnl::algorithm::convolution_direct,
src_md, weights_md, bias_md, dst_md, stride_dims, src_md, weights_md, bias_md, dst_md, stride_dims, dilations_dims,
mkldnn_paddings[0], mkldnn_paddings[1]); mkldnn_paddings[0], mkldnn_paddings[1]);
} else { } else {
this->AcquireForwardPrimitiveDescriptor( this->AcquireForwardPrimitiveDescriptor(
conv_attr, fwd_prop_kind, dnnl::algorithm::convolution_direct, conv_attr, fwd_prop_kind, dnnl::algorithm::convolution_direct,
src_md, weights_md, dst_md, stride_dims, mkldnn_paddings[0], src_md, weights_md, dst_md, stride_dims, dilations_dims,
mkldnn_paddings[1]); mkldnn_paddings[0], mkldnn_paddings[1]);
} }
} }
} }
...@@ -619,9 +615,8 @@ class ConvMKLDNNOpKernel : public paddle::framework::OpKernel<T> { ...@@ -619,9 +615,8 @@ class ConvMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
bool is_conv3d = strides.size() == 3U; bool is_conv3d = strides.size() == 3U;
PADDLE_ENFORCE_NE(is_conv3d, true, PADDLE_ENFORCE_NE(is_conv3d, true,
platform::errors::InvalidArgument( platform::errors::Unimplemented(
"int8 does not support conv3d currently, should " "int8 does not support conv3d currently"));
"set param is_conv3d as False"));
auto input_dims = input->dims(); auto input_dims = input->dims();
auto data_dims = framework::slice_ddim(input_dims, 2, input_dims.size()); auto data_dims = framework::slice_ddim(input_dims, 2, input_dims.size());
...@@ -641,13 +636,8 @@ class ConvMKLDNNOpKernel : public paddle::framework::OpKernel<T> { ...@@ -641,13 +636,8 @@ class ConvMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
GetWeightsTz(weights_tz, g); GetWeightsTz(weights_tz, g);
auto dst_tz = paddle::framework::vectorize(output->dims()); auto dst_tz = paddle::framework::vectorize(output->dims());
PADDLE_ENFORCE_EQ( std::transform(dilations.begin(), dilations.end(), dilations.begin(),
is_conv3d [](int64_t i) { return i - 1; });
? dilations.size() == 3 && dilations[0] == 1 &&
dilations[1] == 1 && dilations[2] == 1
: dilations.size() == 2 && dilations[0] == 1 && dilations[1] == 1,
true, platform::errors::Unimplemented(
"dilation in convolution is not implemented yet"));
const K* filter_data = filter->data<K>(); const K* filter_data = filter->data<K>();
auto scale_in_data = ctx.Attr<float>("Scale_in"); auto scale_in_data = ctx.Attr<float>("Scale_in");
...@@ -710,13 +700,13 @@ class ConvMKLDNNOpKernel : public paddle::framework::OpKernel<T> { ...@@ -710,13 +700,13 @@ class ConvMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
auto bias_md = platform::MKLDNNMemDesc(bias_tz, memory::data_type::s32, auto bias_md = platform::MKLDNNMemDesc(bias_tz, memory::data_type::s32,
MKLDNNMemoryFormat::x); MKLDNNMemoryFormat::x);
conv_pd = handler->AcquireConvolutionPrimitiveDescriptor( conv_pd = handler->AcquireConvolutionPrimitiveDescriptor(
src_md, weights_md, bias_md, dst_md, strides, paddings, src_md, weights_md, bias_md, dst_md, strides, dilations, paddings,
mkldnn_engine, fuse_activation, fuse_alpha, fuse_beta, mkldnn_engine, fuse_activation, fuse_alpha, fuse_beta,
fuse_residual_conn, propagation, output_shift_scale, sum_scale); fuse_residual_conn, propagation, output_shift_scale, sum_scale);
} else { } else {
conv_pd = handler->AcquireConvolutionPrimitiveDescriptor( conv_pd = handler->AcquireConvolutionPrimitiveDescriptor(
src_md, weights_md, boost::none, dst_md, strides, paddings, src_md, weights_md, boost::none, dst_md, strides, dilations,
mkldnn_engine, fuse_activation, fuse_alpha, fuse_beta, paddings, mkldnn_engine, fuse_activation, fuse_alpha, fuse_beta,
fuse_residual_conn, propagation, output_shift_scale, sum_scale); fuse_residual_conn, propagation, output_shift_scale, sum_scale);
} }
...@@ -1019,11 +1009,14 @@ class ConvMKLDNNGradOpKernel : public paddle::framework::OpKernel<T> { ...@@ -1019,11 +1009,14 @@ class ConvMKLDNNGradOpKernel : public paddle::framework::OpKernel<T> {
"Fail to find conv_pd in device context")); "Fail to find conv_pd in device context"));
auto mkldnn_paddings = platform::ToMkldnnPadding(paddings); auto mkldnn_paddings = platform::ToMkldnnPadding(paddings);
std::transform(dilations.begin(), dilations.end(), dilations.begin(),
[](int64_t i) { return i - 1; });
const mkldnn::memory::dims dilations_dims = dilations;
// create backward convolution weights primitive descriptor // create backward convolution weights primitive descriptor
auto conv_bwd_weights_desc = mkldnn::convolution_backward_weights::desc( auto conv_bwd_weights_desc = mkldnn::convolution_backward_weights::desc(
mkldnn::algorithm::convolution_direct, src_md, diff_weights_md, mkldnn::algorithm::convolution_direct, src_md, diff_weights_md,
diff_dst_md, strides, mkldnn_paddings[0], mkldnn_paddings[1]); diff_dst_md, strides, dilations_dims, mkldnn_paddings[0],
mkldnn_paddings[1]);
auto conv_bwd_weights_pd = auto conv_bwd_weights_pd =
std::make_shared<mkldnn::convolution_backward_weights::primitive_desc>( std::make_shared<mkldnn::convolution_backward_weights::primitive_desc>(
...@@ -1032,7 +1025,8 @@ class ConvMKLDNNGradOpKernel : public paddle::framework::OpKernel<T> { ...@@ -1032,7 +1025,8 @@ class ConvMKLDNNGradOpKernel : public paddle::framework::OpKernel<T> {
// create backward convolution data primitive descriptor // create backward convolution data primitive descriptor
auto conv_bwd_data_desc = mkldnn::convolution_backward_data::desc( auto conv_bwd_data_desc = mkldnn::convolution_backward_data::desc(
mkldnn::algorithm::convolution_direct, diff_src_md, weights_md, mkldnn::algorithm::convolution_direct, diff_src_md, weights_md,
diff_dst_md, strides, mkldnn_paddings[0], mkldnn_paddings[1]); diff_dst_md, strides, dilations_dims, mkldnn_paddings[0],
mkldnn_paddings[1]);
auto conv_bwd_data_pd = auto conv_bwd_data_pd =
std::make_shared<mkldnn::convolution_backward_data::primitive_desc>( std::make_shared<mkldnn::convolution_backward_data::primitive_desc>(
......
...@@ -104,6 +104,11 @@ class ConvTransposeMKLDNNOpKernel : public paddle::framework::OpKernel<T> { ...@@ -104,6 +104,11 @@ class ConvTransposeMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
int groups = ctx.Attr<int>("groups"); int groups = ctx.Attr<int>("groups");
std::string padding_algorithm = ctx.Attr<std::string>("padding_algorithm"); std::string padding_algorithm = ctx.Attr<std::string>("padding_algorithm");
PADDLE_ENFORCE_EQ(
strides.size(), 2,
platform::errors::Unimplemented(
"Now we only support 2d oneDNN convolution transpose op"));
auto input_dims = input->dims(); auto input_dims = input->dims();
auto data_dims = framework::slice_ddim(input_dims, 2, input_dims.size()); auto data_dims = framework::slice_ddim(input_dims, 2, input_dims.size());
auto filter_dims = filter->dims(); auto filter_dims = filter->dims();
...@@ -115,10 +120,8 @@ class ConvTransposeMKLDNNOpKernel : public paddle::framework::OpKernel<T> { ...@@ -115,10 +120,8 @@ class ConvTransposeMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
UpdatePaddingAndDilation(&paddings, &dilations, padding_algorithm, UpdatePaddingAndDilation(&paddings, &dilations, padding_algorithm,
data_dims, strides, ksize); data_dims, strides, ksize);
PADDLE_ENFORCE( std::transform(dilations.begin(), dilations.end(), dilations.begin(),
dilations.size() == 2 && dilations[0] == 1 && dilations[1] == 1, [](int64_t i) { return i - 1; });
platform::errors::Unimplemented(
"dilation in convolution is not implemented yet"));
const T* input_data = input->data<T>(); const T* input_data = input->data<T>();
const T* filter_data = filter->data<T>(); const T* filter_data = filter->data<T>();
...@@ -210,11 +213,12 @@ class ConvTransposeMKLDNNOpKernel : public paddle::framework::OpKernel<T> { ...@@ -210,11 +213,12 @@ class ConvTransposeMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
auto bias_md = platform::MKLDNNMemDesc( auto bias_md = platform::MKLDNNMemDesc(
bias_tz, platform::MKLDNNGetDataType<T>(), MKLDNNMemoryFormat::x); bias_tz, platform::MKLDNNGetDataType<T>(), MKLDNNMemoryFormat::x);
conv_transpose_pd = handler.AcquireConvolutionPrimitiveDescriptor( conv_transpose_pd = handler.AcquireConvolutionPrimitiveDescriptor(
src_md, weights_md, bias_md, dst_md, strides, paddings, mkldnn_engine, src_md, weights_md, bias_md, dst_md, strides, dilations, paddings,
fuse_activation, fuse_alpha, fuse_beta, false, fwd_prop_kind); mkldnn_engine, fuse_activation, fuse_alpha, fuse_beta, false,
fwd_prop_kind);
} else { } else {
conv_transpose_pd = handler.AcquireConvolutionPrimitiveDescriptor( conv_transpose_pd = handler.AcquireConvolutionPrimitiveDescriptor(
src_md, weights_md, boost::none, dst_md, strides, paddings, src_md, weights_md, boost::none, dst_md, strides, dilations, paddings,
mkldnn_engine, fuse_activation, fuse_alpha, fuse_beta, false, mkldnn_engine, fuse_activation, fuse_alpha, fuse_beta, false,
fwd_prop_kind); fwd_prop_kind);
} }
......
...@@ -1330,6 +1330,7 @@ class ConvMKLDNNTemplateHandler : public MKLDNNHandler { ...@@ -1330,6 +1330,7 @@ class ConvMKLDNNTemplateHandler : public MKLDNNHandler {
const mkldnn::memory::desc& src, const mkldnn::memory::desc& weights, const mkldnn::memory::desc& src, const mkldnn::memory::desc& weights,
boost::optional<const mkldnn::memory::desc&> bias, boost::optional<const mkldnn::memory::desc&> bias,
const mkldnn::memory::desc& dst, const std::vector<int64_t>& strides, const mkldnn::memory::desc& dst, const std::vector<int64_t>& strides,
const std::vector<int64_t>& dilations,
const std::vector<int64_t>& paddings, const mkldnn::engine& engine, const std::vector<int64_t>& paddings, const mkldnn::engine& engine,
const std::string& fuse_activation, float fuse_alpha, float fuse_beta, const std::string& fuse_activation, float fuse_alpha, float fuse_beta,
const bool fuse_residual_conn, mkldnn::prop_kind fwd_prop_kind, const bool fuse_residual_conn, mkldnn::prop_kind fwd_prop_kind,
...@@ -1352,18 +1353,18 @@ class ConvMKLDNNTemplateHandler : public MKLDNNHandler { ...@@ -1352,18 +1353,18 @@ class ConvMKLDNNTemplateHandler : public MKLDNNHandler {
dev_ctx_.GetBlob(key_conv_pd)); dev_ctx_.GetBlob(key_conv_pd));
if (conv_pd_ == nullptr) { if (conv_pd_ == nullptr) {
mkldnn::memory::dims stride_dims = strides; mkldnn::memory::dims stride_dims = strides;
mkldnn::memory::dims dilations_dims = dilations;
auto mkldnn_paddings = ToMkldnnPadding(paddings); auto mkldnn_paddings = ToMkldnnPadding(paddings);
auto conv_desc = auto conv_desc =
bias ? typename forward_t::desc( bias ? typename forward_t::desc(
fwd_prop_kind, convolutional_algorithm<forward_t>::T, fwd_prop_kind, convolutional_algorithm<forward_t>::T,
src, weights, *bias, dst, stride_dims, src, weights, *bias, dst, stride_dims, dilations_dims,
mkldnn_paddings[0], mkldnn_paddings[1]) mkldnn_paddings[0], mkldnn_paddings[1])
: typename forward_t::desc( : typename forward_t::desc(
fwd_prop_kind, convolutional_algorithm<forward_t>::T, fwd_prop_kind, convolutional_algorithm<forward_t>::T,
src, weights, dst, stride_dims, mkldnn_paddings[0], src, weights, dst, stride_dims, dilations_dims,
mkldnn_paddings[1]); mkldnn_paddings[0], mkldnn_paddings[1]);
mkldnn::primitive_attr conv_attr = mkldnn::primitive_attr conv_attr =
CreatePostOps(fuse_activation, fuse_alpha, fuse_beta, CreatePostOps(fuse_activation, fuse_alpha, fuse_beta,
......
...@@ -23,12 +23,6 @@ from paddle.fluid.tests.unittests.op_test import OpTest, convert_float_to_uint16 ...@@ -23,12 +23,6 @@ from paddle.fluid.tests.unittests.op_test import OpTest, convert_float_to_uint16
from paddle.fluid.tests.unittests.test_conv2d_op import conv2d_forward_naive, TestConv2dOp from paddle.fluid.tests.unittests.test_conv2d_op import conv2d_forward_naive, TestConv2dOp
def conv2d_forward_refer(input, filter, group, conv_param):
out, in_n, out_h, out_w, out_c = conv2d_forward_naive(input, filter, group,
conv_param)
return out
def conv2d_residual_naive(out, residual): def conv2d_residual_naive(out, residual):
assert out.shape == residual.shape assert out.shape == residual.shape
out = np.add(out, residual) out = np.add(out, residual)
...@@ -176,6 +170,21 @@ class TestWithStride(TestConv2dBf16Op): ...@@ -176,6 +170,21 @@ class TestWithStride(TestConv2dBf16Op):
self.input_type = np.uint16 self.input_type = np.uint16
class TestWithDilations(TestConv2dBf16Op):
def init_test_case(self):
self.pad = [1, 1]
self.stride = [1, 1]
self.dilations = [2, 2]
self.input_size = [2, 3, 10, 10]
self.input_residual_size = [2, 6, 8, 8]
assert np.mod(self.input_size[1], self.groups) == 0
f_c = self.input_size[1] // self.groups
self.filter_size = [6, f_c, 3, 3]
def init_data_type(self):
self.input_type = np.uint16
class TestWith1x1ForceFP32Output(TestConv2dBf16Op): class TestWith1x1ForceFP32Output(TestConv2dBf16Op):
def init_test_case(self): def init_test_case(self):
self.pad = [0, 0] self.pad = [0, 0]
......
...@@ -228,6 +228,22 @@ class TestWithStride(TestConv2dInt8Op): ...@@ -228,6 +228,22 @@ class TestWithStride(TestConv2dInt8Op):
self.scale_in_eltwise = 0.5 self.scale_in_eltwise = 0.5
class TestWithDilations(TestConv2dInt8Op):
def init_test_case(self):
self.pad = [1, 1]
self.stride = [1, 1]
self.dilations = [2, 2]
self.input_size = [2, 3, 10, 10]
self.input_residual_size = [2, 6, 8, 8]
assert np.mod(self.input_size[1], self.groups) == 0
f_c = self.input_size[1] // self.groups
self.filter_size = [6, f_c, 3, 3]
self.scale_in = 1.0
self.scale_out = 0.8
self.scale_weights = [10.0]
self.scale_in_eltwise = 0.5
class TestWith1x1(TestConv2dInt8Op): class TestWith1x1(TestConv2dInt8Op):
def init_test_case(self): def init_test_case(self):
self.pad = [0, 0] self.pad = [0, 0]
...@@ -343,6 +359,7 @@ def create_test_int8_class(parent): ...@@ -343,6 +359,7 @@ def create_test_int8_class(parent):
create_test_int8_class(TestConv2dInt8Op) create_test_int8_class(TestConv2dInt8Op)
create_test_int8_class(TestWithPad) create_test_int8_class(TestWithPad)
create_test_int8_class(TestWithStride) create_test_int8_class(TestWithStride)
create_test_int8_class(TestWithDilations)
create_test_int8_class(TestWithGroup) create_test_int8_class(TestWithGroup)
create_test_int8_class(TestWith1x1) create_test_int8_class(TestWith1x1)
create_test_int8_class(TestWithInput1x1Filter1x1) create_test_int8_class(TestWithInput1x1Filter1x1)
......
...@@ -215,5 +215,22 @@ class TestConv2dOp_AsyPadding_NHWC_MKLDNN(TestConv2dOp_Valid_NHWC_MKLDNN): ...@@ -215,5 +215,22 @@ class TestConv2dOp_AsyPadding_NHWC_MKLDNN(TestConv2dOp_Valid_NHWC_MKLDNN):
self.padding_algorithm = "EXPLICIT" self.padding_algorithm = "EXPLICIT"
class TestMKLDNNDilations(TestConv2dMKLDNNOp):
def init_test_case(self):
TestConv2dMKLDNNOp.init_test_case(self)
self.pad = [0, 0]
self.stride = [1, 1]
self.input_size = [2, 3, 10, 10] # NCHW
assert np.mod(self.input_size[1], self.groups) == 0
f_c = self.input_size[1] // self.groups
self.filter_size = [12, f_c, 3, 3]
def init_dilation(self):
self.dilations = [2, 2]
def init_group(self):
self.groups = 3
if __name__ == '__main__': if __name__ == '__main__':
unittest.main() unittest.main()
...@@ -136,3 +136,17 @@ class TestMKLDNNWithValidPad_NHWC(TestMKLDNNWithValidPad): ...@@ -136,3 +136,17 @@ class TestMKLDNNWithValidPad_NHWC(TestMKLDNNWithValidPad):
self.data_format = "NHWC" self.data_format = "NHWC"
N, C, H, W = self.input_size N, C, H, W = self.input_size
self.input_size = [N, H, W, C] self.input_size = [N, H, W, C]
class TestConv2dTransposeMKLDNNWithDilationsExplicitPad(
TestConv2dTransposeMKLDNNOp):
def init_test_case(self):
TestConv2dTransposeMKLDNNOp.init_test_case(self)
self.stride = [2, 1]
self.dilations = [1, 2]
self.groups = 1
self.input_size = [4, 3, 8, 7] # NCHW
f_c = self.input_size[1]
self.filter_size = [f_c, 6, 4, 3]
self.pad = [1, 3, 2, 1]
self.padding_algorithm = "EXPLICIT"
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册