diff --git a/paddle/fluid/framework/ir/mkldnn/cpu_quantize_pass.cc b/paddle/fluid/framework/ir/mkldnn/cpu_quantize_pass.cc index 23419d5b9e0a20adcb6245a5a5aa4c5c4b5f3a34..aa0979b4be64ae3ccebbd7cc82abcf4a4712527a 100644 --- a/paddle/fluid/framework/ir/mkldnn/cpu_quantize_pass.cc +++ b/paddle/fluid/framework/ir/mkldnn/cpu_quantize_pass.cc @@ -19,6 +19,7 @@ #include #include "paddle/fluid/framework/eigen.h" #include "paddle/fluid/platform/errors.h" +#include "paddle/fluid/platform/mkldnn_helper.h" #include "paddle/fluid/string/pretty_log.h" namespace paddle { @@ -54,7 +55,7 @@ void LogQuantizationDisabled(Node* op) { std::stringstream msg_ss; VLOG(4) << "Qantization skipped for operator " << op->Name() << " (type: " << op->Op()->Type() << ", id: " << op->id() - << "). Attribute use_quantizer = false."; + << "). Attribute mkldnn_data_type != \"int8\"."; } } // namespace @@ -228,12 +229,12 @@ double CPUQuantizePass::GetScaleValueForNode(const Node* node, bool CPUQuantizePass::IsOpDequantized(const Node* node) const { return node->Op()->Type() == "dequantize" || - node->Op()->GetAttrIfExists("use_quantizer"); + platform::HasOpINT8DataType(node->Op()); } bool CPUQuantizePass::IsOpQuantized(const Node* node) const { return node->Op()->Type() == "quantize" || - node->Op()->GetAttrIfExists("use_quantizer"); + platform::HasOpINT8DataType(node->Op()); } void CPUQuantizePass::QuantizeConv(Graph* graph, @@ -248,10 +249,9 @@ void CPUQuantizePass::QuantizeConv(Graph* graph, Graph* g) { VLOG(4) << "Quantize conv2d op"; GET_IR_NODE_FROM_SUBGRAPH(conv_op, conv_op, conv_pattern); - auto* conv_op_desc = conv_op->Op(); // skip if should not be quantized - if (!conv_op_desc->GetAttrIfExists("use_quantizer")) { + if (!platform::HasOpINT8DataType(conv_op->Op())) { LogQuantizationDisabled(conv_op); return; } @@ -353,14 +353,13 @@ void CPUQuantizePass::QuantizeFc(Graph* graph) const { Graph* g) { VLOG(4) << "Quantize fc op"; GET_IR_NODE_FROM_SUBGRAPH(fc, fc, fc_pattern); - auto* fc_op_desc = fc->Op(); // skip if should not be quantized - if (!fc_op_desc->GetAttrIfExists("use_quantizer")) { + if (!platform::HasOpINT8DataType(fc->Op())) { LogQuantizationDisabled(fc); return; } - if (!fc_op_desc->GetAttrIfExists("use_mkldnn")) { + if (!fc->Op()->GetAttrIfExists("use_mkldnn")) { return; } @@ -420,10 +419,9 @@ void CPUQuantizePass::QuantizePool(Graph* graph) const { Graph* g) { VLOG(4) << "Quantize pool2d op"; GET_IR_NODE_FROM_SUBGRAPH(pool_op, pool_op, pool_pattern); - auto* pool_op_desc = pool_op->Op(); // skip if should not be quantized - if (!pool_op_desc->GetAttrIfExists("use_quantizer")) { + if (!platform::HasOpINT8DataType(pool_op->Op())) { LogQuantizationDisabled(pool_op); return; } @@ -465,10 +463,9 @@ void CPUQuantizePass::QuantizeConcat(Graph* graph) const { Graph* g) { VLOG(4) << "Quantize concat op"; GET_IR_NODE_FROM_SUBGRAPH(concat_op, concat_op, concat_pattern); - auto* concat_op_desc = concat_op->Op(); // skip if should not be quantized - if (!concat_op_desc->GetAttrIfExists("use_quantizer")) { + if (!platform::HasOpINT8DataType(concat_op->Op())) { LogQuantizationDisabled(concat_op); return; } @@ -511,10 +508,9 @@ void CPUQuantizePass::QuantizePriorBox(Graph* graph) const { Graph* g) { VLOG(4) << "Quantize prior_box op"; GET_IR_NODE_FROM_SUBGRAPH(prior_box_op, prior_box_op, prior_box_pattern); - auto* prior_box_op_desc = prior_box_op->Op(); // skip if should not be quantized - if (!prior_box_op_desc->GetAttrIfExists("use_quantizer")) { + if (!platform::HasOpINT8DataType(prior_box_op->Op())) { LogQuantizationDisabled(prior_box_op); return; } @@ -554,10 +550,9 @@ void CPUQuantizePass::QuantizeTranspose(Graph* graph) const { Graph* g) { VLOG(4) << "Quantize transpose op"; GET_IR_NODE_FROM_SUBGRAPH(transpose_op, transpose_op, transpose_pattern); - auto* transpose_op_desc = transpose_op->Op(); // skip if should not be quantized - if (!transpose_op_desc->GetAttrIfExists("use_quantizer")) { + if (!platform::HasOpINT8DataType(transpose_op->Op())) { LogQuantizationDisabled(transpose_op); return; } @@ -609,10 +604,9 @@ void CPUQuantizePass::QuantizeReshape(Graph* graph) const { Graph* g) { VLOG(4) << "Quantize reshape op"; GET_IR_NODE_FROM_SUBGRAPH(reshape_op, reshape_op, reshape_pattern); - auto* reshape_op_desc = reshape_op->Op(); // skip if should not be quantized - if (!reshape_op_desc->GetAttrIfExists("use_quantizer")) { + if (!platform::HasOpINT8DataType(reshape_op->Op())) { LogQuantizationDisabled(reshape_op); return; } @@ -662,10 +656,9 @@ void CPUQuantizePass::QuantizeMatmul(Graph* graph) const { Graph* g) { VLOG(4) << "Quantize matmul op"; GET_IR_NODE_FROM_SUBGRAPH(matmul_op, matmul_op, matmul_pattern); - auto* matmul_op_desc = matmul_op->Op(); // skip if should not be quantized - if (!matmul_op_desc->GetAttrIfExists("use_quantizer")) { + if (!platform::HasOpINT8DataType(matmul_op->Op())) { LogQuantizationDisabled(matmul_op); return; } @@ -732,10 +725,9 @@ void CPUQuantizePass::QuantizeElementwiseAdd(Graph* graph) const { VLOG(4) << "Quantize elementwise_add op"; GET_IR_NODE_FROM_SUBGRAPH(elementwise_add_op, elementwise_add_op, elementwise_add_pattern); - auto* elementwise_add_op_desc = elementwise_add_op->Op(); // skip if should not be quantized - if (!elementwise_add_op_desc->GetAttrIfExists("use_quantizer")) { + if (!platform::HasOpINT8DataType(elementwise_add_op->Op())) { LogQuantizationDisabled(elementwise_add_op); return; } diff --git a/paddle/fluid/framework/ir/mkldnn/cpu_quantize_pass_tester.cc b/paddle/fluid/framework/ir/mkldnn/cpu_quantize_pass_tester.cc index 395b419cac13d6d76b6e30579e52a1957b548bab..a66e9f0e9389843267d56d09d3de23add3d34a7f 100644 --- a/paddle/fluid/framework/ir/mkldnn/cpu_quantize_pass_tester.cc +++ b/paddle/fluid/framework/ir/mkldnn/cpu_quantize_pass_tester.cc @@ -26,7 +26,7 @@ namespace ir { void SetOp(ProgramDesc* prog, const std::string& type, const std::string& name, const std::vector& inputs, const std::vector& outputs, bool use_mkldnn, - bool use_quantizer = false) { + const std::string& mkldnn_data_type = "float32") { auto* op = prog->MutableBlock(0)->AppendOp(); op->SetType(type); op->SetAttr("use_mkldnn", use_mkldnn); @@ -47,14 +47,14 @@ void SetOp(ProgramDesc* prog, const std::string& type, const std::string& name, op->SetAttr("fuse_residual_connection", false); } op->SetOutput("Output", {outputs[0]}); - op->SetAttr("use_quantizer", use_quantizer); + op->SetAttr("mkldnn_data_type", mkldnn_data_type); op->SetAttr("Scale_in", 1.0f); op->SetAttr("Scale_out", 1.0f); op->SetAttr("Scale_weights", std::vector{1.0f}); } else if (type == "pool2d" || type == "transpose2" || type == "reshape2") { op->SetInput("X", {inputs[0]}); op->SetOutput("Out", {outputs[0]}); - op->SetAttr("use_quantizer", use_quantizer); + op->SetAttr("mkldnn_data_type", mkldnn_data_type); } else if (type == "dropout") { op->SetInput("X", {inputs[0]}); op->SetOutput("Out", {outputs[0]}); @@ -63,14 +63,14 @@ void SetOp(ProgramDesc* prog, const std::string& type, const std::string& name, if (inputs.size() > 1) op->SetInput("W", {inputs[1]}); if (inputs.size() > 2) op->SetInput("Bias", {inputs[2]}); op->SetOutput("Out", {outputs[0]}); - op->SetAttr("use_quantizer", use_quantizer); + op->SetAttr("mkldnn_data_type", mkldnn_data_type); op->SetAttr("Scale_in", 1.0f); op->SetAttr("Scale_out", 1.0f); op->SetAttr("Scale_weights", std::vector{1.0f}); } else if (type == "concat") { op->SetInput("X", inputs); op->SetOutput("Out", outputs); - op->SetAttr("use_quantizer", use_quantizer); + op->SetAttr("mkldnn_data_type", mkldnn_data_type); } else if (type == "dequantize") { op->SetInput("Input", {inputs[0]}); op->SetOutput("Output", {outputs[0]}); @@ -79,7 +79,7 @@ void SetOp(ProgramDesc* prog, const std::string& type, const std::string& name, op->SetInput("X", {inputs[0]}); if (inputs.size() > 1) op->SetInput("Y", {inputs[1]}); op->SetOutput("Out", {outputs[0]}); - op->SetAttr("use_quantizer", use_quantizer); + op->SetAttr("mkldnn_data_type", mkldnn_data_type); op->SetAttr("Scale_x", 1.0f); op->SetAttr("Scale_y", 1.0f); op->SetAttr("Scale_out", 1.0f); @@ -87,7 +87,7 @@ void SetOp(ProgramDesc* prog, const std::string& type, const std::string& name, op->SetInput("X", {inputs[0]}); if (inputs.size() > 1) op->SetInput("Y", {inputs[1]}); op->SetOutput("Out", {outputs[0]}); - op->SetAttr("use_quantizer", use_quantizer); + op->SetAttr("mkldnn_data_type", mkldnn_data_type); op->SetAttr("Scale_x", 1.0f); op->SetAttr("Scale_y", 1.0f); op->SetAttr("Scale_out", 1.0f); @@ -142,7 +142,8 @@ static const std::initializer_list variable_names{ // d->Dropout1->g and (g, w5, b3)->Fc1->h and (h,w3,b1,i)->Conv3->j // // (d,w4, b2)->Conv4->i -ProgramDesc BuildProgramDesc(bool use_mkldnn, bool use_quantizer) { +ProgramDesc BuildProgramDesc(bool use_mkldnn, + const std::string& mkldnn_data_type) { ProgramDesc prog; for (auto& v : variable_names) { auto* var = prog.MutableBlock(0)->Var(v); @@ -152,21 +153,21 @@ ProgramDesc BuildProgramDesc(bool use_mkldnn, bool use_quantizer) { } SetOp(&prog, "conv2d", "Conv1", {"a", "w1"}, {"c"}, use_mkldnn, - use_quantizer); - SetOp(&prog, "pool2d", "Pool1", {"c"}, {"d"}, use_mkldnn, use_quantizer); + mkldnn_data_type); + SetOp(&prog, "pool2d", "Pool1", {"c"}, {"d"}, use_mkldnn, mkldnn_data_type); SetOp(&prog, "conv2d", "Conv2", {"d", "w2"}, {"e"}, use_mkldnn, - use_quantizer); - SetOp(&prog, "pool2d", "Pool2", {"e"}, {"f"}, use_mkldnn, use_quantizer); + mkldnn_data_type); + SetOp(&prog, "pool2d", "Pool2", {"e"}, {"f"}, use_mkldnn, mkldnn_data_type); SetOp(&prog, "dropout", "Dropout1", {"d"}, {"g"}, use_mkldnn); SetOp(&prog, "fc", "Fc1", {"g", "w5", "b3"}, {"h"}, use_mkldnn, - use_quantizer); + mkldnn_data_type); SetOp(&prog, "conv2d", "Conv3", {"h", "w3", "b1", "i"}, {"j"}, use_mkldnn, - use_quantizer); + mkldnn_data_type); SetOp(&prog, "conv2d", "Conv4", {"c", "w4", "b2"}, {"i"}, use_mkldnn, - use_quantizer); + mkldnn_data_type); return prog; } @@ -215,7 +216,7 @@ void MainTest(const ProgramDesc& prog, int conv_count, int pool_count, TEST(CpuQuantizePass, quantize) { bool use_mkldnn = true; - bool use_quantizer = true; + std::string mkldnn_data_type = "int8"; // (a->QUANT1->IN1,w1)->Conv1->OUT1->DEQUANT1->c and // c->QUANT2->IN2->Pool1->OUT2->DEQUANT2->d // @@ -228,16 +229,16 @@ TEST(CpuQuantizePass, quantize) { // (d->QUANT7->IN7,w4, b2)->Conv4->DEQUANT6->OUT6->i // Insert nodes: 8 Quant + 8 IN + 7 OUT + 7 DEQUANT int added_nodes = 8 + 8 + 7 + 7; - MainTest(BuildProgramDesc(use_mkldnn, use_quantizer), 4, 2, 8, 7, added_nodes, - 2.0f * 127); + MainTest(BuildProgramDesc(use_mkldnn, mkldnn_data_type), 4, 2, 8, 7, + added_nodes, 2.0f * 127); } TEST(CpuQuantizePass, do_not_quantize) { bool use_mkldnn = true; - bool use_quantizer = false; + std::string mkldnn_data_type = "float32"; int added_nodes = 0; - MainTest(BuildProgramDesc(use_mkldnn, use_quantizer), 4, 2, 0, 0, added_nodes, - 1.0f); + MainTest(BuildProgramDesc(use_mkldnn, mkldnn_data_type), 4, 2, 0, 0, + added_nodes, 1.0f); } static const std::initializer_list variable_names_concat = { @@ -250,10 +251,10 @@ static const std::initializer_list variable_names_concat = { ProgramDesc BuildProgramDescConcat() { ProgramDesc prog; - SetOp(&prog, "pool2d", "Pool1", {"a1"}, {"b1"}, true, false); - SetOp(&prog, "pool2d", "Pool2", {"a2"}, {"b2"}, true, false); - SetOp(&prog, "concat", "Concat", {"b1", "b2"}, {"c"}, true, true); - SetOp(&prog, "pool2d", "Pool3", {"c"}, {"d"}, true, false); + SetOp(&prog, "pool2d", "Pool1", {"a1"}, {"b1"}, true, "float32"); + SetOp(&prog, "pool2d", "Pool2", {"a2"}, {"b2"}, true, "float32"); + SetOp(&prog, "concat", "Concat", {"b1", "b2"}, {"c"}, true, "int8"); + SetOp(&prog, "pool2d", "Pool3", {"c"}, {"d"}, true, "float32"); return prog; } @@ -321,11 +322,11 @@ ProgramDesc BuildProgramDescTranspose() { } } - SetOp(&prog, "conv2d", "Conv1", {"a", "w1"}, {"b"}, true, true); - SetOp(&prog, "transpose2", "Transpose1", {"b"}, {"c"}, true, true); - SetOp(&prog, "conv2d", "Conv1", {"c", "w2"}, {"d"}, true, true); - SetOp(&prog, "transpose2", "Transpose2", {"d"}, {"e"}, true, true); - SetOp(&prog, "dropout", "Dropout", {"e"}, {"f"}, true, false); + SetOp(&prog, "conv2d", "Conv1", {"a", "w1"}, {"b"}, true, "int8"); + SetOp(&prog, "transpose2", "Transpose1", {"b"}, {"c"}, true, "int8"); + SetOp(&prog, "conv2d", "Conv1", {"c", "w2"}, {"d"}, true, "int8"); + SetOp(&prog, "transpose2", "Transpose2", {"d"}, {"e"}, true, "int8"); + SetOp(&prog, "dropout", "Dropout", {"e"}, {"f"}, true, "float32"); return prog; } @@ -400,8 +401,8 @@ ProgramDesc BuildProgramDescReshape() { prog.MutableBlock(0)->Var(v); } SetOp(&prog, "dequantize", "Dequantize1", {"a"}, {"b"}, true); - SetOp(&prog, "reshape2", "Reshape2", {"b"}, {"c"}, true, true); - SetOp(&prog, "dropout", "Dropout", {"c"}, {"d"}, true, false); + SetOp(&prog, "reshape2", "Reshape2", {"b"}, {"c"}, true, "int8"); + SetOp(&prog, "dropout", "Dropout", {"c"}, {"d"}, true, "float32"); return prog; } @@ -415,9 +416,9 @@ ProgramDesc BuildProgramDescReshapeBetweenNonQuantizedOp() { prog.MutableBlock(0)->Var(v); } - SetOp(&prog, "transpose2", "Transpose2", {"a"}, {"b"}, true, false); - SetOp(&prog, "reshape2", "Reshape2", {"b"}, {"c"}, true, true); - SetOp(&prog, "dropout", "Dropout", {"c"}, {"d"}, true, false); + SetOp(&prog, "transpose2", "Transpose2", {"a"}, {"b"}, true, "float32"); + SetOp(&prog, "reshape2", "Reshape2", {"b"}, {"c"}, true, "int8"); + SetOp(&prog, "dropout", "Dropout", {"c"}, {"d"}, true, "float32"); return prog; } @@ -505,8 +506,8 @@ ProgramDesc BuildProgramDescMatmul() { } SetOp(&prog, "dequantize", "Dequantize1", {"a"}, {"b"}, true); SetOp(&prog, "dequantize", "Dequantize2", {"c"}, {"d"}, true); - SetOp(&prog, "matmul", "Matmul", {"b", "d"}, {"e"}, true, true); - SetOp(&prog, "dropout", "Dropout", {"e"}, {"f"}, true, false); + SetOp(&prog, "matmul", "Matmul", {"b", "d"}, {"e"}, true, "int8"); + SetOp(&prog, "dropout", "Dropout", {"e"}, {"f"}, true, "float32"); return prog; } @@ -518,8 +519,8 @@ ProgramDesc BuildProgramDescMatmulNotQuantized() { } SetOp(&prog, "dropout", "Dropout", {"a"}, {"b"}, false); SetOp(&prog, "dequantize", "Dequantize", {"c"}, {"d"}, true); - SetOp(&prog, "matmul", "Matmul", {"b", "d"}, {"e"}, true, true); - SetOp(&prog, "dropout", "Dropout", {"e"}, {"f"}, true, false); + SetOp(&prog, "matmul", "Matmul", {"b", "d"}, {"e"}, true, "int8"); + SetOp(&prog, "dropout", "Dropout", {"e"}, {"f"}, true, "float32"); return prog; } @@ -590,8 +591,8 @@ ProgramDesc BuildProgramDescElementwiseAdd() { SetOp(&prog, "dequantize", "Dequantize1", {"a"}, {"b"}, true); SetOp(&prog, "dequantize", "Dequantize2", {"c"}, {"d"}, true); SetOp(&prog, "elementwise_add", "ElementwiseAdd", {"b", "d"}, {"e"}, true, - true); - SetOp(&prog, "dropout", "Dropout", {"e"}, {"f"}, true, false); + "int8"); + SetOp(&prog, "dropout", "Dropout", {"e"}, {"f"}, true, "float32"); return prog; } diff --git a/paddle/fluid/framework/ir/mkldnn/cpu_quantize_placement_pass.cc b/paddle/fluid/framework/ir/mkldnn/cpu_quantize_placement_pass.cc index d570d885c6ebe7a6ff2c7a9047959a2e773541a5..6be4ce566e01e9bcb89a38cbdc2bbd11551a065e 100644 --- a/paddle/fluid/framework/ir/mkldnn/cpu_quantize_placement_pass.cc +++ b/paddle/fluid/framework/ir/mkldnn/cpu_quantize_placement_pass.cc @@ -32,11 +32,19 @@ void CPUQuantizePlacementPass::ApplyImpl(ir::Graph* graph) const { n->id()) != excluded_ids_list.end()) continue; auto* op = n->Op(); - if (op->HasAttr("use_quantizer") || op->HasProtoAttr("use_quantizer")) { + if (op->HasAttr("mkldnn_data_type") || + op->HasProtoAttr("mkldnn_data_type")) { + // use_quantizer is no longer used + // assign value for compatibility + if (op->GetAttrIfExists("use_quantizer")) { + op->SetAttr("mkldnn_data_type", std::string("int8")); + } if (op_types_list.empty()) { + op->SetAttr("mkldnn_data_type", std::string("int8")); op->SetAttr("use_quantizer", true); } else if (std::find(op_types_list.begin(), op_types_list.end(), op->Type()) != op_types_list.end()) { + op->SetAttr("mkldnn_data_type", std::string("int8")); op->SetAttr("use_quantizer", true); } } diff --git a/paddle/fluid/framework/ir/mkldnn/cpu_quantize_placement_pass_tester.cc b/paddle/fluid/framework/ir/mkldnn/cpu_quantize_placement_pass_tester.cc index 027d3e2861b506003c4ce1cbe8bd7ee02e0c9c2d..95e321e5b7190499f98c9df3dbef217310abcfcd 100644 --- a/paddle/fluid/framework/ir/mkldnn/cpu_quantize_placement_pass_tester.cc +++ b/paddle/fluid/framework/ir/mkldnn/cpu_quantize_placement_pass_tester.cc @@ -15,7 +15,7 @@ #include "paddle/fluid/framework/ir/mkldnn/cpu_quantize_placement_pass.h" #include -#include +#include "paddle/fluid/platform/mkldnn_helper.h" namespace paddle { namespace framework { @@ -24,13 +24,11 @@ namespace ir { void SetOp(ProgramDesc* prog, const std::string& type, const std::string& name, const std::vector& inputs, const std::vector& outputs, - boost::tribool use_quantizer) { + const std::string& mkldnn_data_type = "float32") { auto* op = prog->MutableBlock(0)->AppendOp(); op->SetType(type); - - if (!boost::indeterminate(use_quantizer)) - op->SetAttr("use_quantizer", use_quantizer); + op->SetAttr("mkldnn_data_type", mkldnn_data_type); if (type == "conv2d") { op->SetAttr("name", name); @@ -50,7 +48,7 @@ void SetOp(ProgramDesc* prog, const std::string& type, const std::string& name, op->SetOutput("Out", {outputs[0]}); } -// operator use_quantizer +// operator mkldnn_data_type // --------------------------------------- // (a,b)->concat->c none // (c,weights,bias)->conv->f false @@ -71,19 +69,19 @@ ProgramDesc BuildProgramDesc() { } } - SetOp(&prog, "concat", "concat1", {"a", "b"}, {"c"}, boost::indeterminate); - SetOp(&prog, "conv2d", "conv1", {"c", "weights", "bias"}, {"f"}, false); - SetOp(&prog, "relu", "relu1", {"f"}, {"g"}, boost::indeterminate); - SetOp(&prog, "pool2d", "pool1", {"g"}, {"h"}, false); - SetOp(&prog, "conv2d", "conv2", {"h", "weights2", "bias2"}, {"k"}, false); - SetOp(&prog, "pool2d", "pool2", {"k"}, {"l"}, false); + SetOp(&prog, "concat", "concat1", {"a", "b"}, {"c"}, "float32"); + SetOp(&prog, "conv2d", "conv1", {"c", "weights", "bias"}, {"f"}, "float32"); + SetOp(&prog, "relu", "relu1", {"f"}, {"g"}, "float32"); + SetOp(&prog, "pool2d", "pool1", {"g"}, {"h"}, "float32"); + SetOp(&prog, "conv2d", "conv2", {"h", "weights2", "bias2"}, {"k"}, "float32"); + SetOp(&prog, "pool2d", "pool2", {"k"}, {"l"}, "float32"); return prog; } void MainTest(std::initializer_list quantize_enabled_op_types, std::initializer_list quantize_excluded_op_ids, - unsigned expected_use_quantizer_true_count) { + unsigned expected_int8_data_type_count) { auto prog = BuildProgramDesc(); std::unique_ptr graph(new ir::Graph(prog)); @@ -96,38 +94,34 @@ void MainTest(std::initializer_list quantize_enabled_op_types, graph.reset(pass->Apply(graph.release())); - unsigned use_quantizer_true_count = 0; + unsigned int8_data_type_count = 0; for (auto* node : graph->Nodes()) { if (node->IsOp()) { - auto* op = node->Op(); - if (op->HasAttr("use_quantizer") && - BOOST_GET_CONST(bool, op->GetAttr("use_quantizer"))) { - ++use_quantizer_true_count; + if (platform::HasOpINT8DataType(node->Op())) { + ++int8_data_type_count; } } } - EXPECT_EQ(use_quantizer_true_count, expected_use_quantizer_true_count); + EXPECT_EQ(int8_data_type_count, expected_int8_data_type_count); } -void DefaultAttrTest(unsigned expected_use_quantizer_true_count) { +void DefaultAttrTest(unsigned expected_int8_data_type_count) { auto prog = BuildProgramDesc(); std::unique_ptr graph(new ir::Graph(prog)); auto pass = PassRegistry::Instance().Get("cpu_quantize_placement_pass"); graph.reset(pass->Apply(graph.release())); - unsigned use_quantizer_true_count = 0; + unsigned int8_data_type_count = 0; for (auto* node : graph->Nodes()) { if (node->IsOp()) { - auto* op = node->Op(); - if (op->HasAttr("use_quantizer") && - BOOST_GET_CONST(bool, op->GetAttr("use_quantizer"))) { - ++use_quantizer_true_count; + if (platform::HasOpINT8DataType(node->Op())) { + ++int8_data_type_count; } } } - EXPECT_EQ(use_quantizer_true_count, expected_use_quantizer_true_count); + EXPECT_EQ(int8_data_type_count, expected_int8_data_type_count); } TEST(QuantizerPlacementPass, enabled_pool) { MainTest({"pool2d"}, {}, 2); } @@ -137,13 +131,13 @@ TEST(QuantizerPlacementPass, enabled_conv_excluded_one) { } TEST(QuantizerPlacementPass, excluded_none) { - // 2 conv + 2 pool - MainTest({}, {}, 4); + // all operators quantized + MainTest({}, {}, 6); } TEST(QuantizerPlacementPass, default_attr_value) { - // 2 conv + 2 pool - DefaultAttrTest(4); + // all operators quantized + DefaultAttrTest(6); } } // namespace ir diff --git a/paddle/fluid/inference/api/mkldnn_quantizer.cc b/paddle/fluid/inference/api/mkldnn_quantizer.cc index 0dddbf158722181664f3c982cde8b77145eece3c..9be12ff309acff681da75f7f13e317a408a9552a 100644 --- a/paddle/fluid/inference/api/mkldnn_quantizer.cc +++ b/paddle/fluid/inference/api/mkldnn_quantizer.cc @@ -27,6 +27,7 @@ #include "paddle/fluid/framework/type_defs.h" #include "paddle/fluid/inference/analysis/analyzer.h" #include "paddle/fluid/inference/api/analysis_predictor.h" +#include "paddle/fluid/platform/mkldnn_helper.h" #include "paddle/fluid/platform/place.h" #include "paddle/fluid/string/pretty_log.h" @@ -50,8 +51,7 @@ bool AnalysisPredictor::MkldnnQuantizer::CalculateScales() { using VariableNameMap = std::map>; std::map> gathered_data; for (const auto* op : predictor_.inference_program_->Block(0).AllOps()) { - if (op->HasAttr("use_quantizer") && - BOOST_GET_CONST(bool, op->GetAttr("use_quantizer"))) { + if (platform::HasOpINT8DataType(op)) { const VariableNameMap& connections_in = op->Inputs(); const VariableNameMap& connections_out = op->Outputs(); diff --git a/paddle/fluid/operators/concat_op.cc b/paddle/fluid/operators/concat_op.cc index 060f5412f28e3704e64d33d9a3081a2ca934e918..4f337c03599a548ac3d95ddd06c726be30d7c13f 100644 --- a/paddle/fluid/operators/concat_op.cc +++ b/paddle/fluid/operators/concat_op.cc @@ -122,12 +122,16 @@ class ConcatOpMaker : public framework::OpProtoAndCheckerMaker { "It has higher priority than Attr(axis). " "The shape of AxisTensor must be [1].") .AsDispensable(); - AddAttr("use_quantizer", - "(bool, default false) " - "Set to true for operators that should be quantized and use " - "int8 kernel. " - "Only used on CPU.") + AddAttr( + "use_quantizer", + "(bool, default false) " + "This parameter is no longer used. Use 'mkldnn_data_type' instead.") .SetDefault(false); + AddAttr( + "mkldnn_data_type", + "(string, default \"float32\"). Data type of mkldnn kernel") + .SetDefault("float32") + .InEnum({"float32", "int8", "bfloat16"}); AddComment(R"DOC( Concat Operator. diff --git a/paddle/fluid/operators/conv_op.cc b/paddle/fluid/operators/conv_op.cc index 57a5db88c53a393e80204ef94acc9fbc3472e2f7..a8c4107add1beeb9a7a5aedad9be982b6d8b6aac 100644 --- a/paddle/fluid/operators/conv_op.cc +++ b/paddle/fluid/operators/conv_op.cc @@ -279,12 +279,16 @@ void Conv2DOpMaker::Make() { AddAttr("use_mkldnn", "(bool, default false) Only used in mkldnn kernel") .SetDefault(false); - AddAttr("use_quantizer", - "(bool, default false) " - "Set to true for operators that should be quantized and use " - "int8 kernel. " - "Only used on CPU.") + AddAttr( + "use_quantizer", + "(bool, default false) " + "This parameter is no longer used. Use 'mkldnn_data_type' instead.") .SetDefault(false); + AddAttr( + "mkldnn_data_type", + "(string, default \"float32\"). Data type of mkldnn kernel") + .SetDefault("float32") + .InEnum({"float32", "int8", "bfloat16"}); AddAttr("fuse_relu", "(bool, default false) Only used in mkldnn kernel") .SetDefault(false); AddAttr("fuse_brelu", diff --git a/paddle/fluid/operators/detection/prior_box_op.cc b/paddle/fluid/operators/detection/prior_box_op.cc index bd584d660f7d4f76f9ea30354f3b0c2696b6d048..0d293bb964b615c68891be516534a05cc2277426 100644 --- a/paddle/fluid/operators/detection/prior_box_op.cc +++ b/paddle/fluid/operators/detection/prior_box_op.cc @@ -14,6 +14,8 @@ limitations under the License. */ #include "paddle/fluid/operators/detection/prior_box_op.h" +#include + #ifdef PADDLE_WITH_MKLDNN #include "paddle/fluid/platform/mkldnn_helper.h" #endif @@ -218,12 +220,16 @@ class PriorBoxOpMaker : public framework::OpProtoAndCheckerMaker { AddAttr("use_mkldnn", "(bool, default false) Only used in mkldnn kernel") .SetDefault(false); - AddAttr("use_quantizer", - "(bool, default false) " - "Set to true for operators that should be quantized and use " - "int8 kernel. " - "Only used on CPU.") + AddAttr( + "use_quantizer", + "(bool, default false) " + "This parameter is no longer used. Use 'mkldnn_data_type' instead.") .SetDefault(false); + AddAttr( + "mkldnn_data_type", + "(string, default \"float32\"). Data type of mkldnn kernel") + .SetDefault("float32") + .InEnum({"float32", "int8", "bfloat16"}); AddComment(R"DOC( Prior box operator Generate prior boxes for SSD(Single Shot MultiBox Detector) algorithm. diff --git a/paddle/fluid/operators/elementwise/elementwise_op.h b/paddle/fluid/operators/elementwise/elementwise_op.h index f32086c94a99d1267939a421b1c73a858f877556..de202ecf88cacbb5877f8aa226409b65e819d3c6 100644 --- a/paddle/fluid/operators/elementwise/elementwise_op.h +++ b/paddle/fluid/operators/elementwise/elementwise_op.h @@ -140,12 +140,17 @@ class ElementwiseOpMaker : public framework::OpProtoAndCheckerMaker { .SetDefault(""); AddAttr("y_data_format", "This parameter is no longer used.") .SetDefault(""); - /* int8 parameters */ - AddAttr("use_quantizer", - "(bool, default false) " - "Set to true for operators that should be quantized and use " - "int8 kernel. Only used on CPU.") + AddAttr( + "use_quantizer", + "(bool, default false) " + "This parameter is no longer used. Use 'mkldnn_data_type' instead.") .SetDefault(false); + AddAttr( + "mkldnn_data_type", + "(string, default \"float32\"). Data type of mkldnn kernel") + .SetDefault("float32") + .InEnum({"float32", "int8", "bfloat16"}); + /* int8 parameters */ AddAttr("Scale_x", "(float, default 1.0f), The quantize scale of X tensor") .SetDefault(1.0f); diff --git a/paddle/fluid/operators/fc_op.cc b/paddle/fluid/operators/fc_op.cc index f81ed3096238b9da84655c86828040062c96106d..847b24f4f0b0b529cdb36b5823a789c97e0a9d84 100644 --- a/paddle/fluid/operators/fc_op.cc +++ b/paddle/fluid/operators/fc_op.cc @@ -142,13 +142,17 @@ class FCOpMaker : public framework::OpProtoAndCheckerMaker { AddAttr(framework::kAllKernelsMustComputeRuntimeShape, "Skip calling InferShape() function in the runtime.") .SetDefault(true); - /* int8 parameters */ - AddAttr("use_quantizer", - "(bool, default false) " - "Set to true for operators that should be quantized and use " - "int8 kernel. " - "Only used on CPU.") + AddAttr( + "use_quantizer", + "(bool, default false) " + "This parameter is no longer used. Use 'mkldnn_data_type' instead.") .SetDefault(false); + AddAttr( + "mkldnn_data_type", + "(string, default \"float32\"). Data type of mkldnn kernel") + .SetDefault("float32") + .InEnum({"float32", "int8", "bfloat16"}); + /* int8 parameters */ AddAttr("Scale_in", "(float, default 1.0f), The quantize scale of input data") .SetDefault(1.0f); diff --git a/paddle/fluid/operators/matmul_op.cc b/paddle/fluid/operators/matmul_op.cc index 98277df454e09acf82af69d4de5f430ae4f25e57..809164df2056cb4f4856a0b70ea5076351603199 100644 --- a/paddle/fluid/operators/matmul_op.cc +++ b/paddle/fluid/operators/matmul_op.cc @@ -535,13 +535,17 @@ class MatMulOpMaker : public framework::OpProtoAndCheckerMaker { R"DOC(When MKLDNN MatMul_transpose_reshape fuse activated, " "it's a axis atribute of fused transpose for `Out` output.)DOC") .SetDefault({}); - /* int8 parameters */ - AddAttr("use_quantizer", - "(bool, default false) " - "Set to true for operators that should be quantized and use " - "int8 kernel. " - "Only used on CPU.") + AddAttr( + "use_quantizer", + "(bool, default false) " + "This parameter is no longer used. Use 'mkldnn_data_type' instead.") .SetDefault(false); + AddAttr( + "mkldnn_data_type", + "(string, default \"float32\"). Data type of mkldnn kernel") + .SetDefault("float32") + .InEnum({"float32", "int8", "bfloat16"}); + /* int8 parameters */ AddAttr("Scale_x", "(float, default 1.0f), The quantize scale of X tensor") .SetDefault(1.0f); diff --git a/paddle/fluid/operators/pool_op.cc b/paddle/fluid/operators/pool_op.cc index 8ff3192ca24e193dfbddebdd1ce79ce91f08dbb2..9900120e6c590f5d0c454fdba3ee4e936c2c409b 100644 --- a/paddle/fluid/operators/pool_op.cc +++ b/paddle/fluid/operators/pool_op.cc @@ -306,12 +306,16 @@ void Pool2dOpMaker::Make() { AddAttr("use_mkldnn", "(bool) Only used in mkldnn kernel. Default False") .SetDefault(false); - AddAttr("use_quantizer", - "(bool) " - "Set to true for operators that should be quantized and use " - "int8 kernel. " - "Only used on CPU. Default False") + AddAttr( + "use_quantizer", + "(bool, default false) " + "This parameter is no longer used. Use 'mkldnn_data_type' instead.") .SetDefault(false); + AddAttr( + "mkldnn_data_type", + "(string, default \"float32\"). Data type of mkldnn kernel") + .SetDefault("float32") + .InEnum({"float32", "int8", "bfloat16"}); AddAttr( "data_format", "(string, default NCHW) Only used in " diff --git a/paddle/fluid/operators/reshape_op.cc b/paddle/fluid/operators/reshape_op.cc index fee0f045825591d548350c289f3f290d5dd1d723..01a33a46521cd81d084f8971c47741b28a105d41 100644 --- a/paddle/fluid/operators/reshape_op.cc +++ b/paddle/fluid/operators/reshape_op.cc @@ -431,13 +431,16 @@ class Reshape2OpMaker : public ReshapeOpMaker { "XShape is just used to store the shape and lod of X, which will " "be used in FlattenGradOp.") .AsIntermediate(); - /* int8 parameters */ - AddAttr("use_quantizer", - "(bool, default false) " - "Set to true for operators that should be quantized and use " - "int8 kernel. " - "Used only on CPU.") + AddAttr( + "use_quantizer", + "(bool, default false) " + "This parameter is no longer used. Use 'mkldnn_data_type' instead.") .SetDefault(false); + AddAttr( + "mkldnn_data_type", + "(string, default \"float32\"). Data type of mkldnn kernel") + .SetDefault("float32") + .InEnum({"float32", "int8", "bfloat16"}); } }; diff --git a/paddle/fluid/operators/transpose_op.cc b/paddle/fluid/operators/transpose_op.cc index 6849bd739501cc73e7142e6462a3f627f445bd22..946fa6305d737363dab3cea2e2b581f2e5659cfd 100644 --- a/paddle/fluid/operators/transpose_op.cc +++ b/paddle/fluid/operators/transpose_op.cc @@ -108,13 +108,17 @@ class TransposeOpMaker : public framework::OpProtoAndCheckerMaker { "Defaults to \"NHWC\". Specify the data format of the output data, " "the input will be transformed automatically. ") .SetDefault("AnyLayout"); - /* int8 parameters */ - AddAttr("use_quantizer", - "(bool, default false) " - "Set to true for operators that should be quantized and use " - "int8 kernel. " - "Only used on CPU.") + AddAttr( + "use_quantizer", + "(bool, default false) " + "This parameter is no longer used. Use 'mkldnn_data_type' instead.") .SetDefault(false); + AddAttr( + "mkldnn_data_type", + "(string, default \"float32\"). Data type of mkldnn kernel") + .SetDefault("float32") + .InEnum({"float32", "int8", "bfloat16"}); + /* int8 parameters */ AddComment(R"DOC( Transpose Operator. diff --git a/paddle/fluid/platform/mkldnn_helper.h b/paddle/fluid/platform/mkldnn_helper.h index 60588d89db803fe6421d16b48ec98e1f02ad522c..c74c47b7d84820f089d4e657f8bddccc5de8d727 100644 --- a/paddle/fluid/platform/mkldnn_helper.h +++ b/paddle/fluid/platform/mkldnn_helper.h @@ -422,6 +422,11 @@ inline std::vector> ToMkldnnPadding( } } +inline bool HasOpINT8DataType(const paddle::framework::OpDesc* op) { + return (op->GetAttrIfExists("mkldnn_data_type") == "int8" || + op->GetAttrIfExists("use_quantizer")); +} + enum class RNNReorderType { PP_NTC, PP_TNC, NTC_PP, TNC_PP }; } // namespace platform diff --git a/python/paddle/fluid/layers/nn.py b/python/paddle/fluid/layers/nn.py index 1ef15ca4733518949a3d1856c20a0e5f70820554..ae42b3bbdf0ada9ab19dc1ad6cc2f1e09def214d 100644 --- a/python/paddle/fluid/layers/nn.py +++ b/python/paddle/fluid/layers/nn.py @@ -11969,7 +11969,7 @@ for func in [ ], skip_attrs_set={ "x_data_format", "y_data_format", "axis", "use_quantizer", - "Scale_x", "Scale_y", "Scale_out" + "mkldnn_data_type", "Scale_x", "Scale_y", "Scale_out" }) + """\n""" + str(func.__doc__) for func in []: diff --git a/python/paddle/tensor/math.py b/python/paddle/tensor/math.py index 9b1d7ec3a542c471d2f960b6f0b0b2b7f3509b99..c531eb7f5a49fc5eb054ea75c87591031958830e 100644 --- a/python/paddle/tensor/math.py +++ b/python/paddle/tensor/math.py @@ -609,7 +609,7 @@ for func in [ op_proto, additional_args_lines=additional_args_lines, skip_attrs_set={"x_data_format", "y_data_format", "axis", - "use_quantizer", "Scale_x", "Scale_y", "Scale_out" + "use_quantizer", "mkldnn_data_type", "Scale_x", "Scale_y", "Scale_out" }) + """\n""" + str(func.__doc__) def sum(input, dim=None, dtype=None, keep_dim=False, name=None):