未验证 提交 a4c3e038 编写于 作者: J joanna.wozna.intel 提交者: GitHub

Correct conv2d int8 mkldnn UT (#36711)

* Refactor conv2d int8 unit test

* Correct according to review and add int8 check
上级 e11ecfce
...@@ -226,6 +226,23 @@ bool SupportsBfloat16FastPerformance() { ...@@ -226,6 +226,23 @@ bool SupportsBfloat16FastPerformance() {
#endif #endif
} }
bool SupportsInt8() {
#ifndef PADDLE_WITH_MKLDNN
return false;
#else
return (platform::MayIUse(platform::cpu_isa_t::avx2) ||
platform::MayIUse(platform::cpu_isa_t::avx512f));
#endif
}
bool SupportsVNNI() {
#ifndef PADDLE_WITH_MKLDNN
return false;
#else
return platform::MayIUse(platform::cpu_isa_t::avx512_core_vnni);
#endif
}
// According to the input `place` and `dtype`, this function returns a tuple // According to the input `place` and `dtype`, this function returns a tuple
// consists of three sets: // consists of three sets:
// 1) All operators registered in the Paddle framework. // 1) All operators registered in the Paddle framework.
...@@ -2121,6 +2138,8 @@ All parameter, weight, gradient are variables in Paddle. ...@@ -2121,6 +2138,8 @@ All parameter, weight, gradient are variables in Paddle.
m.def("_is_compiled_with_heterps", IsCompiledWithHETERPS); m.def("_is_compiled_with_heterps", IsCompiledWithHETERPS);
m.def("supports_bfloat16", SupportsBfloat16); m.def("supports_bfloat16", SupportsBfloat16);
m.def("supports_bfloat16_fast_performance", SupportsBfloat16FastPerformance); m.def("supports_bfloat16_fast_performance", SupportsBfloat16FastPerformance);
m.def("supports_int8", SupportsInt8);
m.def("supports_vnni", SupportsVNNI);
m.def("op_supported_infos", OpSupportedInfos); m.def("op_supported_infos", OpSupportedInfos);
m.def("is_compiled_with_brpc", IsCompiledWithBrpc); m.def("is_compiled_with_brpc", IsCompiledWithBrpc);
m.def("is_compiled_with_dist", IsCompiledWithDIST); m.def("is_compiled_with_dist", IsCompiledWithDIST);
......
...@@ -23,13 +23,12 @@ from paddle.fluid.tests.unittests.test_conv2d_op import conv2d_forward_naive, Te ...@@ -23,13 +23,12 @@ from paddle.fluid.tests.unittests.test_conv2d_op import conv2d_forward_naive, Te
def conv2d_forward_refer(input, filter, group, conv_param): def conv2d_forward_refer(input, filter, group, conv_param):
out, in_n, out_h, out_w, out_c = conv2d_forward_naive(input, filter, group, out, _, _, _, _ = conv2d_forward_naive(input, filter, group, conv_param)
conv_param)
return out return out
@unittest.skipIf(not core.supports_bfloat16(), @unittest.skipIf(not core.supports_int8(),
"place does not support BF16 evaluation") "place does not support int8 computation")
class TestConv2DInt8Op(TestConv2DOp): class TestConv2DInt8Op(TestConv2DOp):
def setUp(self): def setUp(self):
self.op_type = "conv2d" self.op_type = "conv2d"
...@@ -53,73 +52,61 @@ class TestConv2DInt8Op(TestConv2DOp): ...@@ -53,73 +52,61 @@ class TestConv2DInt8Op(TestConv2DOp):
'pad': self.pad, 'pad': self.pad,
'dilation': self.dilations 'dilation': self.dilations
} }
# This implementation of convolution quantization is based on OneDNN documentation
# https://oneapi-src.github.io/oneDNN/dev_guide_int8_computations.html#doxid-dev-guide-int8-computations-1dg-i8-comp-s11
scale_output_shift = (self.scale_out /
(self.scale_in * self.scale_weights[0]))
filter = np.random.random(self.filter_size).astype(self.weighttype) filter = np.random.random(self.filter_size).astype(self.weighttype)
if self.srctype == np.uint8:
input = np.random.randint(0, 10,
self.input_size).astype(self.srctype)
else:
input = np.random.randint(-5, 5,
self.input_size).astype(self.srctype)
input_shift = (np.ones(self.input_size) * 128).astype(np.uint8)
if self.srctype == np.int8: # When the Intel AVX2 or Intel AVX512 Instruction Set is used
# the reorder additionally scales the weights by 0.5
# to overcome the potential overflow issue. If the processor supports VNNI instructions,
# modification of the weights is not necessary.
avx_scale = 0.5 if not core.supports_vnni(
) and self.srctype == np.int8 else 1.
filter_int = np.round(filter * self.scale_weights[0] * filter_int = np.round(filter * self.scale_weights[0] *
0.5).astype(np.int32) avx_scale).astype(np.int32)
scale_output_shift = self.scale_out / (self.scale_in * scale_output_shift = scale_output_shift / avx_scale
self.scale_weights[0] * 0.5)
output1 = conv2d_forward_refer( def conv2d_forward_refer_helper(input_):
np.round((input.astype(np.int32) + input_shift) * return conv2d_forward_refer(
self.scale_in).astype(np.int32), filter_int, input_.astype(np.int32), filter_int, self.groups,
self.groups,
conv2d_param).astype(np.float32) * scale_output_shift
output2 = conv2d_forward_refer(
np.round((input_shift) * self.scale_in).astype(np.int32),
filter_int, self.groups,
conv2d_param).astype(np.float32) * scale_output_shift conv2d_param).astype(np.float32) * scale_output_shift
if self.fuse_residual:
input_residual = np.random.randint(
-5, 5, self.input_residual_size).astype(self.srctype)
output_tmp = np.round(output1 - output2 + input_residual.astype(
self.srctype) * (self.scale_out / self.scale_in_eltwise))
if self.fuse_activation == "relu":
output = np.maximum(output_tmp, 0).astype(self.dsttype)
else:
output = output_tmp.astype(self.dsttype)
else:
if self.fuse_activation == "relu":
output = np.maximum(np.round(output1 - output2),
0).astype(self.dsttype)
else:
output = np.round(output1 - output2).astype(self.dsttype)
def residual_helper(init_low, init_high, output_):
input_residual_ = np.random.randint(
init_low, init_high,
self.input_residual_size).astype(self.srctype)
return (output_ + input_residual_ *
(self.scale_out / self.scale_in_eltwise)), input_residual_
if self.srctype == np.int8:
init_low, init_high = (-5, 5)
input = np.random.randint(init_low, init_high,
self.input_size).astype(self.srctype)
input_shift = (np.ones(self.input_size) * 128).astype(np.uint8)
output1 = conv2d_forward_refer_helper(
np.round(input + input_shift).astype(np.int32))
output2 = conv2d_forward_refer_helper(
np.round(input_shift).astype(np.int32))
output = output1 - output2
else: else:
filter_int = np.round(filter * init_low, init_high = (0, 10)
self.scale_weights[0]).astype(np.int32) input = np.random.randint(init_low, init_high,
scale_output_shift = self.scale_out / (self.scale_in * self.input_size).astype(self.srctype)
self.scale_weights[0]) output = conv2d_forward_refer_helper(input)
output1 = conv2d_forward_refer(
input.astype(np.int32), filter_int, self.groups,
conv2d_param).astype(np.float32)
output1_tmp = np.round(output1 * (
self.scale_out / (self.scale_in * self.scale_weights[0])))
if self.fuse_residual: if self.fuse_residual:
input_residual = np.random.randint( output, input_residual = residual_helper(init_low, init_high,
0, 10, self.input_residual_size).astype(self.srctype) output)
output_tmp_res = np.round(output1 * (self.scale_out / (
self.scale_in * self.scale_weights[ output = np.round(output)
0])) + input_residual.astype(np.int32) * (
self.scale_out / self.scale_in_eltwise))
if self.fuse_activation == "relu":
output = np.maximum(output_tmp_res, 0).astype(self.dsttype)
else:
output = output_tmp_res.astype(self.dsttype)
else:
if self.fuse_activation == "relu": if self.fuse_activation == "relu":
output = np.maximum(output1_tmp, 0).astype(self.dsttype) output = np.maximum(output, 0)
else:
output = output1_tmp.astype(self.dsttype) output = output.astype(self.dsttype)
self.inputs = { self.inputs = {
'Input': 'Input':
...@@ -169,7 +156,7 @@ class TestConv2DInt8Op(TestConv2DOp): ...@@ -169,7 +156,7 @@ class TestConv2DInt8Op(TestConv2DOp):
f_c = self.input_size[1] // self.groups f_c = self.input_size[1] // self.groups
self.input_residual_size = [1, 2, 3, 3] self.input_residual_size = [1, 2, 3, 3]
self.filter_size = [2, f_c, 3, 3] self.filter_size = [2, f_c, 3, 3]
self.scale_in = 1.0 self.scale_in = 0.95
self.scale_out = 0.5 self.scale_out = 0.5
self.scale_weights = [10.0] self.scale_weights = [10.0]
self.scale_in_eltwise = 0.6 self.scale_in_eltwise = 0.6
...@@ -185,7 +172,7 @@ class TestConv2DInt8Op(TestConv2DOp): ...@@ -185,7 +172,7 @@ class TestConv2DInt8Op(TestConv2DOp):
self.fuse_residual = True self.fuse_residual = True
#--------------------test conv2d u8 in and u8 out with residual fuse-------------------- # --------------------test conv2d u8 in and u8 out with residual fuse--------------------
class TestConv2D(TestConv2DInt8Op): class TestConv2D(TestConv2DInt8Op):
...@@ -197,7 +184,7 @@ class TestConv2D(TestConv2DInt8Op): ...@@ -197,7 +184,7 @@ class TestConv2D(TestConv2DInt8Op):
assert np.mod(self.input_size[1], self.groups) == 0 assert np.mod(self.input_size[1], self.groups) == 0
f_c = self.input_size[1] // self.groups f_c = self.input_size[1] // self.groups
self.filter_size = [6, f_c, 3, 3] self.filter_size = [6, f_c, 3, 3]
self.scale_in = 1.0 self.scale_in = 0.95
self.scale_out = 0.5 self.scale_out = 0.5
self.scale_weights = [10.0] self.scale_weights = [10.0]
self.scale_in_eltwise = 0.6 self.scale_in_eltwise = 0.6
...@@ -224,7 +211,7 @@ class TestWithStride(TestConv2DInt8Op): ...@@ -224,7 +211,7 @@ class TestWithStride(TestConv2DInt8Op):
assert np.mod(self.input_size[1], self.groups) == 0 assert np.mod(self.input_size[1], self.groups) == 0
f_c = self.input_size[1] // self.groups f_c = self.input_size[1] // self.groups
self.filter_size = [6, f_c, 3, 3] self.filter_size = [6, f_c, 3, 3]
self.scale_in = 1.0 self.scale_in = 0.95
self.scale_out = 0.8 self.scale_out = 0.8
self.scale_weights = [10.0] self.scale_weights = [10.0]
self.scale_in_eltwise = 0.5 self.scale_in_eltwise = 0.5
...@@ -240,7 +227,7 @@ class TestWithDilations(TestConv2DInt8Op): ...@@ -240,7 +227,7 @@ class TestWithDilations(TestConv2DInt8Op):
assert np.mod(self.input_size[1], self.groups) == 0 assert np.mod(self.input_size[1], self.groups) == 0
f_c = self.input_size[1] // self.groups f_c = self.input_size[1] // self.groups
self.filter_size = [6, f_c, 3, 3] self.filter_size = [6, f_c, 3, 3]
self.scale_in = 1.0 self.scale_in = 0.95
self.scale_out = 0.8 self.scale_out = 0.8
self.scale_weights = [10.0] self.scale_weights = [10.0]
self.scale_in_eltwise = 0.5 self.scale_in_eltwise = 0.5
...@@ -255,7 +242,7 @@ class TestWith1x1(TestConv2DInt8Op): ...@@ -255,7 +242,7 @@ class TestWith1x1(TestConv2DInt8Op):
assert np.mod(self.input_size[1], self.groups) == 0 assert np.mod(self.input_size[1], self.groups) == 0
f_c = self.input_size[1] // self.groups f_c = self.input_size[1] // self.groups
self.filter_size = [6, f_c, 1, 1] self.filter_size = [6, f_c, 1, 1]
self.scale_in = 1.0 self.scale_in = 0.95
self.scale_out = 0.5 self.scale_out = 0.5
self.scale_weights = [12.0] self.scale_weights = [12.0]
self.scale_in_eltwise = 0.5 self.scale_in_eltwise = 0.5
...@@ -270,7 +257,7 @@ class TestWithInput1x1Filter1x1(TestConv2DInt8Op): ...@@ -270,7 +257,7 @@ class TestWithInput1x1Filter1x1(TestConv2DInt8Op):
assert np.mod(self.input_size[1], self.groups) == 0 assert np.mod(self.input_size[1], self.groups) == 0
f_c = self.input_size[1] // self.groups f_c = self.input_size[1] // self.groups
self.filter_size = [6, f_c, 1, 1] self.filter_size = [6, f_c, 1, 1]
self.scale_in = 1.0 self.scale_in = 0.95
self.scale_out = 0.5 self.scale_out = 0.5
self.scale_weights = [10.0] self.scale_weights = [10.0]
self.scale_in_eltwise = 0.8 self.scale_in_eltwise = 0.8
...@@ -290,32 +277,32 @@ def init_data_type_with_fusion(self, input_dt, fuse_activation, fuse_residual): ...@@ -290,32 +277,32 @@ def init_data_type_with_fusion(self, input_dt, fuse_activation, fuse_residual):
def create_test_int8_class(parent): def create_test_int8_class(parent):
#--------------------test conv2d s8 in and u8 out-------------------- # --------------------test conv2d s8 in and u8 out--------------------
class TestS8U8Case(parent): class TestS8U8Case(parent):
def init_data_type(self): def init_data_type(self):
init_data_type_with_fusion(self, np.int8, "relu", False) init_data_type_with_fusion(self, np.int8, "relu", False)
#--------------------test conv2d s8 in and s8 out-------------------- # --------------------test conv2d s8 in and s8 out--------------------
class TestS8S8Case(parent): class TestS8S8Case(parent):
def init_data_type(self): def init_data_type(self):
init_data_type_with_fusion(self, np.int8, "", False) init_data_type_with_fusion(self, np.int8, "", False)
#--------------------test conv2d u8 in and s8 out-------------------- # --------------------test conv2d u8 in and s8 out--------------------
class TestU8S8Case(parent): class TestU8S8Case(parent):
def init_data_type(self): def init_data_type(self):
init_data_type_with_fusion(self, np.uint8, "", False) init_data_type_with_fusion(self, np.uint8, "", False)
#--------------------test conv2d u8 in and u8 out without residual fuse-------------------- # --------------------test conv2d u8 in and u8 out without residual fuse--------------------
class TestU8U8Case(parent): class TestU8U8Case(parent):
def init_data_type(self): def init_data_type(self):
init_data_type_with_fusion(self, np.uint8, "relu", False) init_data_type_with_fusion(self, np.uint8, "relu", False)
#--------------------test conv2d s8 in and s8 out with residual fuse-------------------- # --------------------test conv2d s8 in and s8 out with residual fuse--------------------
class TestS8S8ResCase(parent): class TestS8S8ResCase(parent):
def init_data_type(self): def init_data_type(self):
init_data_type_with_fusion(self, np.int8, "", True) init_data_type_with_fusion(self, np.int8, "", True)
#--------------------test conv2d u8 in and s8 out with residual fuse-------------------- # --------------------test conv2d u8 in and s8 out with residual fuse--------------------
class TestU8S8ResCase(parent): class TestU8S8ResCase(parent):
def init_data_type(self): def init_data_type(self):
init_data_type_with_fusion(self, np.uint8, "", True) init_data_type_with_fusion(self, np.uint8, "", True)
...@@ -333,9 +320,9 @@ def create_test_int8_class(parent): ...@@ -333,9 +320,9 @@ def create_test_int8_class(parent):
TestS8S8Case.__name__ = cls_name_s8s8 TestS8S8Case.__name__ = cls_name_s8s8
TestU8S8Case.__name__ = cls_name_u8s8 TestU8S8Case.__name__ = cls_name_u8s8
TestU8U8Case.__name__ = cls_name_u8u8 TestU8U8Case.__name__ = cls_name_u8u8
TestS8S8ResCase.__name__ = cls_name_s8s8_re_1 TestS8S8ResCase.__name__ = cls_name_s8s8_re_1
TestU8S8ResCase.__name__ = cls_name_u8s8_re_1 TestU8S8ResCase.__name__ = cls_name_u8s8_re_1
globals()[cls_name_s8u8] = TestS8U8Case globals()[cls_name_s8u8] = TestS8U8Case
globals()[cls_name_s8s8] = TestS8S8Case globals()[cls_name_s8s8] = TestS8S8Case
globals()[cls_name_u8s8] = TestU8S8Case globals()[cls_name_u8s8] = TestU8S8Case
...@@ -344,7 +331,7 @@ def create_test_int8_class(parent): ...@@ -344,7 +331,7 @@ def create_test_int8_class(parent):
globals()[cls_name_u8s8_re_1] = TestU8S8ResCase globals()[cls_name_u8s8_re_1] = TestU8S8ResCase
if os.name != 'nt': if os.name != 'nt':
#--------------------test conv2d s8 in and u8 out with residual fuse-------------------- # --------------------test conv2d s8 in and u8 out with residual fuse--------------------
class TestS8U8ResCase(parent): class TestS8U8ResCase(parent):
def init_data_type(self): def init_data_type(self):
init_data_type_with_fusion(self, np.int8, "relu", True) init_data_type_with_fusion(self, np.int8, "relu", True)
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册