未验证 提交 2c444dfa 编写于 作者: S Sławomir Siwek 提交者: GitHub

Replace matmul with matmul_v2 during oneDNN fuse passes (#49108)

* replace matmul with matmul_v2 in fuse passes

* Remove fusion logic from matmul

* removing fusion methods

* add proper name

* adjust namespaces
上级 958b9f07
......@@ -77,6 +77,16 @@ void MatmulActivationMkldnnFusePass::FuseMatmulAct(
? "gelu_tanh"
: "gelu_erf";
}
if (matmul_type == "matmul") {
matmul_op->SetType("matmul_v2");
matmul_op->SetAttr("trans_x", matmul_op->GetAttr("transpose_X"));
matmul_op->SetAttr("trans_y", matmul_op->GetAttr("transpose_Y"));
auto matmul_alpha = matmul_op->GetAttrIfExists<float>("alpha");
if (matmul_alpha != 1.0f) {
matmul_op->SetAttr("alpha", matmul_alpha);
}
}
matmul_op->SetAttr("fuse_activation", act_type);
matmul_op->SetOutput("Out", {activation_out->Name()});
......
......@@ -65,6 +65,16 @@ void MatmulElementwiseAddMKLDNNFusePass::FuseMatmulElementwiseAdd(
return;
}
if (matmul_type == "matmul") {
matmul->Op()->SetType("matmul_v2");
matmul->Op()->SetAttr("trans_x", matmul->Op()->GetAttr("transpose_X"));
matmul->Op()->SetAttr("trans_y", matmul->Op()->GetAttr("transpose_Y"));
auto matmul_alpha = matmul->Op()->GetAttrIfExists<float>("alpha");
if (matmul_alpha != 1.0f) {
matmul->Op()->SetAttr("alpha", matmul_alpha);
}
}
matmul->Op()->SetInput("ResidualData", {elementwise_addend->Name()});
matmul->Op()->SetOutput("Out", {elementwise_add_out->Name()});
......
......@@ -84,6 +84,15 @@ void MatmulTransposeReshapeMKLDNNPass::Fuse(
}
OpDesc *matmul_desc = matmul_op->Op();
if (matmul_type == "matmul") {
matmul_desc->SetType("matmul_v2");
matmul_desc->SetAttr("trans_x", matmul_desc->GetAttr("transpose_X"));
matmul_desc->SetAttr("trans_y", matmul_desc->GetAttr("transpose_Y"));
auto matmul_alpha = matmul_desc->GetAttrIfExists<float>("alpha");
if (matmul_alpha != 1.0f) {
matmul_desc->SetAttr("alpha", matmul_alpha);
}
}
matmul_desc->SetOutput("Out", {reshape_out->Name()});
matmul_desc->SetAttr("fused_reshape_Out", reshape_shape);
matmul_desc->SetAttr("fused_transpose_Out", transpose_axis);
......
......@@ -85,6 +85,17 @@ void FuseOperatorScaleOneDNNPass::FuseScale(Graph *graph,
scale = *(scale_tensor->data<float>());
}
if (op_type == "matmul") {
operator_op->Op()->SetType("matmul_v2");
operator_op->Op()->SetAttr("trans_x",
operator_op->Op()->GetAttr("transpose_X"));
operator_op->Op()->SetAttr("trans_y",
operator_op->Op()->GetAttr("transpose_Y"));
auto matmul_alpha = operator_op->Op()->GetAttrIfExists<float>("alpha");
if (matmul_alpha != 1.0f) {
operator_op->Op()->SetAttr("alpha", matmul_alpha);
}
}
operator_op->Op()->SetAttr("fused_output_scale", scale);
operator_op->Op()->SetOutput("Out", {scale_out->Name()});
......
......@@ -123,6 +123,15 @@ void ReshapeTransposeMatmulMkldnnFusePass::Fuse(
return;
}
if (matmul_type == "matmul") {
matmul_desc->SetType("matmul_v2");
matmul_desc->SetAttr("trans_x", matmul_desc->GetAttr("transpose_X"));
matmul_desc->SetAttr("trans_y", matmul_desc->GetAttr("transpose_Y"));
auto matmul_alpha = matmul_desc->GetAttrIfExists<float>("alpha");
if (matmul_alpha != 1.0f) {
matmul_desc->SetAttr("alpha", matmul_alpha);
}
}
matmul_desc->SetInput(matmul_input_name, {(reshape_in)->Name()});
matmul_desc->SetAttr("fused_reshape_" + matmul_input_name, reshape_shape);
matmul_desc->SetAttr("fused_transpose_" + matmul_input_name,
......
......@@ -97,7 +97,7 @@ void TestMain(const std::string& op_name, bool with_xshapes) {
int removed = 8; // 2* reshape, reshape_out, transpose, transpose_out
if (with_xshapes) removed += 2; // transpose_xshape, reshape_xshape
EXPECT_EQ(total_nodes_before - removed, total_nodes_after);
auto* matmul_op_desc = GetOpNodes(graph, op_name).at(0)->Op();
auto* matmul_op_desc = GetOpNodes(graph, "matmul_v2").at(0)->Op();
auto check = [&matmul_op_desc](std::string a) {
std::string shape_str = "fused_reshape_" + a;
......
......@@ -345,26 +345,6 @@ class MatMulGradKernel : public framework::OpKernel<T> {
}
};
framework::DDim GetDimForInput(const framework::InferShapeContext &ctx,
std::string input_name) {
auto shape = ctx.Attrs().Get<std::vector<int>>("fused_reshape_" + input_name);
auto axis =
ctx.Attrs().Get<std::vector<int>>("fused_transpose_" + input_name);
auto dim = ctx.GetInputDim(input_name);
PADDLE_ENFORCE_GT(dim.size(),
0,
platform::errors::InvalidArgument(
"The Input(%s) has not been initialized properly. The "
"shape of Input(%s) = [%s].",
dim));
if (!shape.empty() && !axis.empty()) {
dim = dim.reshape(shape).transpose(axis);
}
return dim;
}
template <typename DeviceContext, typename T>
class MatMulDoubleGradKernel : public framework::OpKernel<T> {
public:
......@@ -579,8 +559,8 @@ class MatMulOp : public framework::OperatorWithKernel {
OP_INOUT_CHECK(context->HasInput("Y"), "Input", "Y", "matmul");
OP_INOUT_CHECK(context->HasOutput("Out"), "Output", "Out", "matmul");
auto dim_x = GetDimForInput(*context, "X");
auto dim_y = GetDimForInput(*context, "Y");
auto dim_x = context->GetInputDim("X");
auto dim_y = context->GetInputDim("Y");
#ifdef PADDLE_WITH_MKLDNN
// (jczaja): For NHWC execution output shape needs
......@@ -681,14 +661,6 @@ class MatMulOp : public framework::OperatorWithKernel {
framework::DDim ddim_out = phi::make_ddim(dim_out);
#ifdef PADDLE_WITH_MKLDNN
auto shape = context->Attrs().Get<std::vector<int>>("fused_reshape_Out");
auto axis = context->Attrs().Get<std::vector<int>>("fused_transpose_Out");
if (!shape.empty() && !axis.empty()) {
ddim_out = ddim_out.transpose(axis).reshape(shape);
}
#endif
context->SetOutputDim("Out", ddim_out);
context->ShareLoD("X", "Out");
}
......
......@@ -99,7 +99,7 @@ const std::unordered_map<std::string, ExtraAttrPropertySet>
{"fuse_alpha", ExtraAttrProperty::ONEDNN},
{"fuse_beta", ExtraAttrProperty::ONEDNN},
{"fuse_relu", ExtraAttrProperty::ONEDNN},
{"fused_output_scale", ExtraAttrProperty::ONEDNN},
{"alpha", ExtraAttrProperty::ONEDNN},
{"fuse_residual_connection", ExtraAttrProperty::ONEDNN},
{"fuse_with_relu", ExtraAttrProperty::ONEDNN},
{"fused_reshape_Out", ExtraAttrProperty::ONEDNN},
......
......@@ -146,7 +146,7 @@ class TestMatmulActivationMkldnnFusePass(PassAutoScanTest):
'operator_scale_onednn_fuse_pass',
],
)
yield config, ['matmul'], (1e-5, 1e-5)
yield config, ['matmul_v2'], (1e-5, 1e-5)
def test(self):
self.run_and_statis(
......
......@@ -137,7 +137,7 @@ class TestMatmulElementwiseAddActivationMkldnnFusePass(PassAutoScanTest):
'matmul_activation_mkldnn_fuse_pass',
],
)
yield config, ['matmul'], (1e-5, 1e-5)
yield config, ['matmul_v2'], (1e-5, 1e-5)
def test(self):
self.run_and_statis(
......
......@@ -76,7 +76,7 @@ class TestMatmulElementwiseAddMkldnnFusePass(PassAutoScanTest):
config = self.create_inference_config(
use_mkldnn=True, passes=['matmul_elementwise_add_mkldnn_fuse_pass']
)
yield config, ['matmul'], (1e-5, 1e-5)
yield config, ['matmul_v2'], (1e-5, 1e-5)
def test(self):
self.run_and_statis(
......
......@@ -116,7 +116,7 @@ class TestMatmulTransposeReshapeMkldnnFusePass(PassAutoScanTest):
def sample_predictor_configs(self, program_config):
config = self.create_inference_config(use_mkldnn=True)
yield config, ["matmul"], (1e-5, 1e-5)
yield config, ["matmul_v2"], (1e-5, 1e-5)
def test(self):
self.run_and_statis(
......
......@@ -135,17 +135,8 @@ class TestMatmulv2TransposeReshapeMkldnnFusePass(PassAutoScanTest):
return program_config
def sample_predictor_configs(self, program_config):
# gpu_cpu_map_matmul_v2_to_matmul_pass will affect the type of final fused op
fused_op = "matmul_v2"
input1_dim1 = program_config.inputs["input_data1"].shape[0]
input2_dim1 = program_config.inputs["input_data2"].shape[0]
input1_dim2 = program_config.inputs["input_data1"].shape[1]
input2_dim2 = program_config.inputs["input_data2"].shape[1]
if input1_dim1 == input2_dim1 and input1_dim2 == input2_dim2:
fused_op = "matmul"
config = self.create_inference_config(use_mkldnn=True)
yield config, [fused_op], (1e-5, 1e-5)
yield config, ["matmul_v2"], (1e-5, 1e-5)
def test(self):
self.run_and_statis(
......
......@@ -153,7 +153,7 @@ class TestReshapeTransposeMatmulMkldnnFusePass(PassAutoScanTest):
def sample_predictor_configs(self, program_config):
config = self.create_inference_config(use_mkldnn=True)
yield config, ["matmul"], (1e-5, 1e-5)
yield config, ["matmul_v2"], (1e-5, 1e-5)
def test(self):
self.run_and_statis(
......
......@@ -17,7 +17,7 @@ import unittest
import numpy as np
from paddle.fluid.tests.unittests.op_test import OpTest, skip_check_grad_ci
from paddle.fluid.tests.unittests.op_test import OpTest
class TestDnnlMatMulOp(OpTest):
......@@ -254,321 +254,6 @@ class TestDnnlMatMulOpInt8ForceFP32BasicScales(TestDnnlMatMulOp):
self.attrs = {'force_fp32_output': True}
@skip_check_grad_ci(reason="DNNL's MatMul doesn't implement grad kernel.")
class TestReshapeTransposeMatMulOp(OpTest):
def init_data_type(self):
self.data_type_ = 'float32'
def generate_data(self):
self.x = (
np.random.random([2, 128, 768])
.astype("float32")
.reshape([2, 128, 12, 64])
.transpose([0, 2, 1, 3])
)
self.y = (
np.random.random([2, 128, 768])
.astype("float32")
.reshape([2, 128, 12, 64])
.transpose([0, 2, 1, 3])
)
self.out = np.matmul(self.x, self.y.transpose([0, 1, 3, 2]))
self.fused_reshape_X = []
self.fused_transpose_X = []
self.fused_reshape_Y = []
self.fused_transpose_Y = []
def set_op_type_and_transpose_y_name(self):
self.op_type = "matmul"
self.transpose_y_name = "transpose_Y"
def setUp(self):
self.set_op_type_and_transpose_y_name()
self._cpu_only = True
self.use_mkldnn = True
self.transpose_y = True
self.init_data_type()
self.generate_data()
self.inputs = {'X': self.x, 'Y': self.y}
self.attrs = {
'use_mkldnn': self.use_mkldnn,
self.transpose_y_name: self.transpose_y,
}
if len(self.fused_transpose_X) > 0:
self.attrs['fused_transpose_X'] = self.fused_transpose_X
if len(self.fused_transpose_Y) > 0:
self.attrs['fused_transpose_Y'] = self.fused_transpose_Y
if len(self.fused_reshape_X) > 0:
self.attrs['fused_reshape_X'] = self.fused_reshape_X
if len(self.fused_reshape_Y) > 0:
self.attrs['fused_reshape_Y'] = self.fused_reshape_Y
self.outputs = {'Out': self.out}
def test_check_output(self):
self.check_output()
class TestReshapeTransposeMatMulOp4DXFloat(TestReshapeTransposeMatMulOp):
def generate_data(self):
self.x = np.random.random([2, 128, 768]).astype("float32")
self.y = (
np.random.random([2, 128, 768])
.astype("float32")
.reshape([2, 128, 12, 64])
.transpose([0, 2, 1, 3])
)
self.fused_transpose_X = [0, 2, 1, 3]
self.fused_reshape_X = [0, 0, 12, 64]
self.fused_transpose_Y = []
self.fused_reshape_Y = []
self.out = np.matmul(
self.x.reshape([2, 128, 12, 64]).transpose([0, 2, 1, 3]),
self.y.transpose([0, 1, 3, 2]),
)
class TestReshapeTransposeMatMulOp4DXInt8(TestReshapeTransposeMatMulOp4DXFloat):
def init_data_type(self):
self.data_type_ = 'int8'
class TestReshapeTransposeMatMulOp4DYFloat(TestReshapeTransposeMatMulOp):
def generate_data(self):
self.x = (
np.random.random([2, 128, 768])
.astype("float32")
.reshape([2, 128, 12, 64])
.transpose([0, 2, 1, 3])
)
self.y = np.random.random([2, 128, 768]).astype("float32")
self.fused_transpose_X = []
self.fused_reshape_X = []
self.fused_transpose_Y = [0, 2, 1, 3]
self.fused_reshape_Y = [0, 0, 12, 64]
self.out = np.matmul(
self.x, self.y.reshape([2, 128, 12, 64]).transpose([0, 2, 3, 1])
)
class TestReshapeTransposeMatMulOp4DYInt8(TestReshapeTransposeMatMulOp4DYFloat):
def init_data_type(self):
self.data_type_ = 'int8'
class TestReshapeTransposeMatMulOp4DXYFloat(TestReshapeTransposeMatMulOp):
def generate_data(self):
self.x = np.random.random([2, 128, 768]).astype("float32")
self.y = np.random.random([2, 128, 768]).astype("float32")
self.fused_transpose_X = [0, 2, 1, 3]
self.fused_reshape_X = [0, 0, 12, 64]
self.fused_transpose_Y = [0, 2, 1, 3]
self.fused_reshape_Y = [0, 0, 12, 64]
self.out = np.matmul(
self.x.reshape([2, 128, 12, 64]).transpose([0, 2, 1, 3]),
self.y.reshape([2, 128, 12, 64]).transpose([0, 2, 3, 1]),
)
class TestReshapeTransposeMatMulOp4DXYInt8(
TestReshapeTransposeMatMulOp4DXYFloat
):
def init_data_type(self):
self.data_type_ = 'int8'
class TestReshapeTransposeMatMulOp2DXFloat(TestReshapeTransposeMatMulOp):
def generate_data(self):
self.x = np.random.random([2, 5, 10]).astype("float32")
self.y = (
np.random.random([2, 5, 10])
.astype("float32")
.reshape([10, 10])
.transpose([1, 0])
)
self.fused_transpose_X = [1, 0]
self.fused_reshape_X = [10, 10]
self.fused_transpose_Y = []
self.fused_reshape_Y = []
self.out = np.matmul(
self.x.reshape([10, 10]).transpose([1, 0]), self.y.transpose([1, 0])
)
class TestReshapeTransposeMatMulOp2DXInt8(TestReshapeTransposeMatMulOp2DXFloat):
def init_data_type(self):
self.data_type_ = 'int8'
class TestReshapeTransposeMatMulOp2DYFloat(TestReshapeTransposeMatMulOp):
def generate_data(self):
self.x = (
np.random.random([2, 5, 10])
.astype("float32")
.reshape([10, 10])
.transpose([1, 0])
)
self.y = np.random.random([2, 5, 10]).astype("float32")
self.fused_transpose_X = []
self.fused_reshape_X = []
self.fused_transpose_Y = [1, 0]
self.fused_reshape_Y = [10, 10]
self.out = np.matmul(self.x, self.y.reshape([10, 10]))
class TestReshapeTransposeMatMulOp2DYInt8(TestReshapeTransposeMatMulOp2DYFloat):
def init_data_type(self):
self.data_type_ = 'int8'
class TestReshapeTransposeMatMulOp3DXFloat(TestReshapeTransposeMatMulOp):
def generate_data(self):
self.x = np.random.random([2, 2, 5, 5]).astype("float32")
self.y = (
np.random.random([2, 2, 5, 5])
.astype("float32")
.reshape([2, 10, 5])
.transpose([0, 2, 1])
)
self.fused_transpose_X = [0, 2, 1]
self.fused_reshape_X = [2, 10, 5]
self.fused_transpose_Y = []
self.fused_reshape_Y = []
self.out = np.matmul(
self.x.reshape([2, 10, 5]).transpose(0, 2, 1),
self.y.transpose(0, 2, 1),
)
class TestReshapeTransposeMatMulOp3DXInt8(TestReshapeTransposeMatMulOp3DXFloat):
def init_data_type(self):
self.data_type_ = 'int8'
class TestReshapeTransposeMatMulOp3DYFloat(TestReshapeTransposeMatMulOp):
def generate_data(self):
self.x = (
np.random.random([2, 2, 5, 5])
.astype(self.data_type_)
.reshape([2, 10, 5])
.transpose([0, 2, 1])
)
self.y = np.random.random([2, 2, 5, 5]).astype(self.data_type_)
self.fused_transpose_X = []
self.fused_reshape_X = []
self.fused_transpose_Y = [0, 2, 1]
self.fused_reshape_Y = [2, 10, 5]
self.out = np.matmul(self.x, self.y.reshape([2, 10, 5]))
class TestReshapeTransposeMatMulOp3DYInt8(TestReshapeTransposeMatMulOp3DYFloat):
def init_data_type(self):
self.data_type_ = 'int8'
@skip_check_grad_ci(reason="Tests inference only optimization.")
class TestMatMulOpTransposeReshapeEmptyFloat(OpTest):
def init_data_type(self):
self.data_type_ = np.float32
def generate_data(self):
self.bs = 1
self.x = np.random.random([self.bs, 128, 128]).astype(self.data_type_)
self.y = np.random.random([self.bs, 128, 64]).astype(self.data_type_)
def init_params_and_out(self):
self.transpose_out = []
self.reshape_out = []
self.out = np.matmul(self.x, self.y)
def set_op_type(self):
self.op_type = "matmul"
def setUp(self):
self.set_op_type()
self._cpu_only = True
self.use_mkldnn = True
self.init_data_type()
self.generate_data()
self.init_params_and_out()
self.inputs = {'X': self.x, 'Y': self.y}
self.attrs = {'use_mkldnn': self.use_mkldnn}
if len(self.reshape_out) > 0:
self.attrs['fused_reshape_Out'] = self.reshape_out
if len(self.transpose_out) > 0:
self.attrs['fused_transpose_Out'] = self.transpose_out
self.inputs = {'X': self.x, 'Y': self.y}
self.outputs = {'Out': self.out}
def test_check_output(self):
self.check_output()
def check_raise_error(self, msg):
try:
self.check_output()
except Exception as e:
if msg in str(e):
raise AttributeError
else:
print(e)
class TestMatMulOpTransposeReshapeIntEmptyInt(
TestMatMulOpTransposeReshapeEmptyFloat
):
def init_data_type(self):
self.data_type_ = np.int8
class TestMatMulOpTransposeReshapeBasicFloat(
TestMatMulOpTransposeReshapeEmptyFloat
):
def generate_data(self):
self.bs = 8
self.x = np.random.random([self.bs, 12, 128, 128]).astype(
self.data_type_
)
self.y = np.random.random([self.bs, 12, 128, 64]).astype(
self.data_type_
)
def init_params_and_out(self):
self.transpose_out = [0, 2, 1, 3]
self.reshape_out = [0, 0, self.x.shape[1] * self.y.shape[-1]]
self.out = (
np.matmul(self.x, self.y)
.transpose([0, 2, 1, 3])
.reshape([self.bs, -1, self.x.shape[1] * self.y.shape[-1]])
)
class TestMatMulOpTransposeReshapeBasicInt(
TestMatMulOpTransposeReshapeBasicFloat
):
def init_data_type(self):
self.data_type_ = np.int8
class TestMatMulOpTransposeReshapeOtherDimFloat(
TestMatMulOpTransposeReshapeBasicFloat
):
def generate_data(self):
self.bs = 11
self.x = np.random.random([self.bs, 12, 14, 18]).astype(self.data_type_)
self.y = np.random.random([self.bs, 12, 18, 13]).astype(self.data_type_)
class TestMatMulOpTransposeReshapeOtherDimInt(
TestMatMulOpTransposeReshapeOtherDimFloat
):
def init_data_type(self):
self.data_type_ = np.int8
if __name__ == "__main__":
from paddle import enable_static
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册