未验证 提交 a4c3e038 编写于 作者: J joanna.wozna.intel 提交者: GitHub

Correct conv2d int8 mkldnn UT (#36711)

* Refactor conv2d int8 unit test

* Correct according to review and add int8 check
上级 e11ecfce
......@@ -226,6 +226,23 @@ bool SupportsBfloat16FastPerformance() {
#endif
}
bool SupportsInt8() {
#ifndef PADDLE_WITH_MKLDNN
return false;
#else
return (platform::MayIUse(platform::cpu_isa_t::avx2) ||
platform::MayIUse(platform::cpu_isa_t::avx512f));
#endif
}
bool SupportsVNNI() {
#ifndef PADDLE_WITH_MKLDNN
return false;
#else
return platform::MayIUse(platform::cpu_isa_t::avx512_core_vnni);
#endif
}
// According to the input `place` and `dtype`, this function returns a tuple
// consists of three sets:
// 1) All operators registered in the Paddle framework.
......@@ -2121,6 +2138,8 @@ All parameter, weight, gradient are variables in Paddle.
m.def("_is_compiled_with_heterps", IsCompiledWithHETERPS);
m.def("supports_bfloat16", SupportsBfloat16);
m.def("supports_bfloat16_fast_performance", SupportsBfloat16FastPerformance);
m.def("supports_int8", SupportsInt8);
m.def("supports_vnni", SupportsVNNI);
m.def("op_supported_infos", OpSupportedInfos);
m.def("is_compiled_with_brpc", IsCompiledWithBrpc);
m.def("is_compiled_with_dist", IsCompiledWithDIST);
......
......@@ -23,13 +23,12 @@ from paddle.fluid.tests.unittests.test_conv2d_op import conv2d_forward_naive, Te
def conv2d_forward_refer(input, filter, group, conv_param):
out, in_n, out_h, out_w, out_c = conv2d_forward_naive(input, filter, group,
conv_param)
out, _, _, _, _ = conv2d_forward_naive(input, filter, group, conv_param)
return out
@unittest.skipIf(not core.supports_bfloat16(),
"place does not support BF16 evaluation")
@unittest.skipIf(not core.supports_int8(),
"place does not support int8 computation")
class TestConv2DInt8Op(TestConv2DOp):
def setUp(self):
self.op_type = "conv2d"
......@@ -53,73 +52,61 @@ class TestConv2DInt8Op(TestConv2DOp):
'pad': self.pad,
'dilation': self.dilations
}
# This implementation of convolution quantization is based on OneDNN documentation
# https://oneapi-src.github.io/oneDNN/dev_guide_int8_computations.html#doxid-dev-guide-int8-computations-1dg-i8-comp-s11
scale_output_shift = (self.scale_out /
(self.scale_in * self.scale_weights[0]))
filter = np.random.random(self.filter_size).astype(self.weighttype)
if self.srctype == np.uint8:
input = np.random.randint(0, 10,
# When the Intel AVX2 or Intel AVX512 Instruction Set is used
# the reorder additionally scales the weights by 0.5
# to overcome the potential overflow issue. If the processor supports VNNI instructions,
# modification of the weights is not necessary.
avx_scale = 0.5 if not core.supports_vnni(
) and self.srctype == np.int8 else 1.
filter_int = np.round(filter * self.scale_weights[0] *
avx_scale).astype(np.int32)
scale_output_shift = scale_output_shift / avx_scale
def conv2d_forward_refer_helper(input_):
return conv2d_forward_refer(
input_.astype(np.int32), filter_int, self.groups,
conv2d_param).astype(np.float32) * scale_output_shift
def residual_helper(init_low, init_high, output_):
input_residual_ = np.random.randint(
init_low, init_high,
self.input_residual_size).astype(self.srctype)
return (output_ + input_residual_ *
(self.scale_out / self.scale_in_eltwise)), input_residual_
if self.srctype == np.int8:
init_low, init_high = (-5, 5)
input = np.random.randint(init_low, init_high,
self.input_size).astype(self.srctype)
input_shift = (np.ones(self.input_size) * 128).astype(np.uint8)
output1 = conv2d_forward_refer_helper(
np.round(input + input_shift).astype(np.int32))
output2 = conv2d_forward_refer_helper(
np.round(input_shift).astype(np.int32))
output = output1 - output2
else:
input = np.random.randint(-5, 5,
init_low, init_high = (0, 10)
input = np.random.randint(init_low, init_high,
self.input_size).astype(self.srctype)
input_shift = (np.ones(self.input_size) * 128).astype(np.uint8)
output = conv2d_forward_refer_helper(input)
if self.srctype == np.int8:
filter_int = np.round(filter * self.scale_weights[0] *
0.5).astype(np.int32)
scale_output_shift = self.scale_out / (self.scale_in *
self.scale_weights[0] * 0.5)
output1 = conv2d_forward_refer(
np.round((input.astype(np.int32) + input_shift) *
self.scale_in).astype(np.int32), filter_int,
self.groups,
conv2d_param).astype(np.float32) * scale_output_shift
output2 = conv2d_forward_refer(
np.round((input_shift) * self.scale_in).astype(np.int32),
filter_int, self.groups,
conv2d_param).astype(np.float32) * scale_output_shift
if self.fuse_residual:
input_residual = np.random.randint(
-5, 5, self.input_residual_size).astype(self.srctype)
output_tmp = np.round(output1 - output2 + input_residual.astype(
self.srctype) * (self.scale_out / self.scale_in_eltwise))
if self.fuse_activation == "relu":
output = np.maximum(output_tmp, 0).astype(self.dsttype)
else:
output = output_tmp.astype(self.dsttype)
else:
if self.fuse_activation == "relu":
output = np.maximum(np.round(output1 - output2),
0).astype(self.dsttype)
else:
output = np.round(output1 - output2).astype(self.dsttype)
if self.fuse_residual:
output, input_residual = residual_helper(init_low, init_high,
output)
else:
filter_int = np.round(filter *
self.scale_weights[0]).astype(np.int32)
scale_output_shift = self.scale_out / (self.scale_in *
self.scale_weights[0])
output1 = conv2d_forward_refer(
input.astype(np.int32), filter_int, self.groups,
conv2d_param).astype(np.float32)
output1_tmp = np.round(output1 * (
self.scale_out / (self.scale_in * self.scale_weights[0])))
if self.fuse_residual:
input_residual = np.random.randint(
0, 10, self.input_residual_size).astype(self.srctype)
output_tmp_res = np.round(output1 * (self.scale_out / (
self.scale_in * self.scale_weights[
0])) + input_residual.astype(np.int32) * (
self.scale_out / self.scale_in_eltwise))
if self.fuse_activation == "relu":
output = np.maximum(output_tmp_res, 0).astype(self.dsttype)
else:
output = output_tmp_res.astype(self.dsttype)
else:
if self.fuse_activation == "relu":
output = np.maximum(output1_tmp, 0).astype(self.dsttype)
else:
output = output1_tmp.astype(self.dsttype)
output = np.round(output)
if self.fuse_activation == "relu":
output = np.maximum(output, 0)
output = output.astype(self.dsttype)
self.inputs = {
'Input':
......@@ -169,7 +156,7 @@ class TestConv2DInt8Op(TestConv2DOp):
f_c = self.input_size[1] // self.groups
self.input_residual_size = [1, 2, 3, 3]
self.filter_size = [2, f_c, 3, 3]
self.scale_in = 1.0
self.scale_in = 0.95
self.scale_out = 0.5
self.scale_weights = [10.0]
self.scale_in_eltwise = 0.6
......@@ -185,7 +172,7 @@ class TestConv2DInt8Op(TestConv2DOp):
self.fuse_residual = True
#--------------------test conv2d u8 in and u8 out with residual fuse--------------------
# --------------------test conv2d u8 in and u8 out with residual fuse--------------------
class TestConv2D(TestConv2DInt8Op):
......@@ -197,7 +184,7 @@ class TestConv2D(TestConv2DInt8Op):
assert np.mod(self.input_size[1], self.groups) == 0
f_c = self.input_size[1] // self.groups
self.filter_size = [6, f_c, 3, 3]
self.scale_in = 1.0
self.scale_in = 0.95
self.scale_out = 0.5
self.scale_weights = [10.0]
self.scale_in_eltwise = 0.6
......@@ -224,7 +211,7 @@ class TestWithStride(TestConv2DInt8Op):
assert np.mod(self.input_size[1], self.groups) == 0
f_c = self.input_size[1] // self.groups
self.filter_size = [6, f_c, 3, 3]
self.scale_in = 1.0
self.scale_in = 0.95
self.scale_out = 0.8
self.scale_weights = [10.0]
self.scale_in_eltwise = 0.5
......@@ -240,7 +227,7 @@ class TestWithDilations(TestConv2DInt8Op):
assert np.mod(self.input_size[1], self.groups) == 0
f_c = self.input_size[1] // self.groups
self.filter_size = [6, f_c, 3, 3]
self.scale_in = 1.0
self.scale_in = 0.95
self.scale_out = 0.8
self.scale_weights = [10.0]
self.scale_in_eltwise = 0.5
......@@ -255,7 +242,7 @@ class TestWith1x1(TestConv2DInt8Op):
assert np.mod(self.input_size[1], self.groups) == 0
f_c = self.input_size[1] // self.groups
self.filter_size = [6, f_c, 1, 1]
self.scale_in = 1.0
self.scale_in = 0.95
self.scale_out = 0.5
self.scale_weights = [12.0]
self.scale_in_eltwise = 0.5
......@@ -270,7 +257,7 @@ class TestWithInput1x1Filter1x1(TestConv2DInt8Op):
assert np.mod(self.input_size[1], self.groups) == 0
f_c = self.input_size[1] // self.groups
self.filter_size = [6, f_c, 1, 1]
self.scale_in = 1.0
self.scale_in = 0.95
self.scale_out = 0.5
self.scale_weights = [10.0]
self.scale_in_eltwise = 0.8
......@@ -290,32 +277,32 @@ def init_data_type_with_fusion(self, input_dt, fuse_activation, fuse_residual):
def create_test_int8_class(parent):
#--------------------test conv2d s8 in and u8 out--------------------
# --------------------test conv2d s8 in and u8 out--------------------
class TestS8U8Case(parent):
def init_data_type(self):
init_data_type_with_fusion(self, np.int8, "relu", False)
#--------------------test conv2d s8 in and s8 out--------------------
# --------------------test conv2d s8 in and s8 out--------------------
class TestS8S8Case(parent):
def init_data_type(self):
init_data_type_with_fusion(self, np.int8, "", False)
#--------------------test conv2d u8 in and s8 out--------------------
# --------------------test conv2d u8 in and s8 out--------------------
class TestU8S8Case(parent):
def init_data_type(self):
init_data_type_with_fusion(self, np.uint8, "", False)
#--------------------test conv2d u8 in and u8 out without residual fuse--------------------
# --------------------test conv2d u8 in and u8 out without residual fuse--------------------
class TestU8U8Case(parent):
def init_data_type(self):
init_data_type_with_fusion(self, np.uint8, "relu", False)
#--------------------test conv2d s8 in and s8 out with residual fuse--------------------
# --------------------test conv2d s8 in and s8 out with residual fuse--------------------
class TestS8S8ResCase(parent):
def init_data_type(self):
init_data_type_with_fusion(self, np.int8, "", True)
#--------------------test conv2d u8 in and s8 out with residual fuse--------------------
# --------------------test conv2d u8 in and s8 out with residual fuse--------------------
class TestU8S8ResCase(parent):
def init_data_type(self):
init_data_type_with_fusion(self, np.uint8, "", True)
......@@ -333,9 +320,9 @@ def create_test_int8_class(parent):
TestS8S8Case.__name__ = cls_name_s8s8
TestU8S8Case.__name__ = cls_name_u8s8
TestU8U8Case.__name__ = cls_name_u8u8
TestS8S8ResCase.__name__ = cls_name_s8s8_re_1
TestU8S8ResCase.__name__ = cls_name_u8s8_re_1
globals()[cls_name_s8u8] = TestS8U8Case
globals()[cls_name_s8s8] = TestS8S8Case
globals()[cls_name_u8s8] = TestU8S8Case
......@@ -344,7 +331,7 @@ def create_test_int8_class(parent):
globals()[cls_name_u8s8_re_1] = TestU8S8ResCase
if os.name != 'nt':
#--------------------test conv2d s8 in and u8 out with residual fuse--------------------
# --------------------test conv2d s8 in and u8 out with residual fuse--------------------
class TestS8U8ResCase(parent):
def init_data_type(self):
init_data_type_with_fusion(self, np.int8, "relu", True)
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册