diff --git a/paddle/fluid/framework/ir/mkldnn/cpu_quantize_pass.cc b/paddle/fluid/framework/ir/mkldnn/cpu_quantize_pass.cc index f4358fb243f20bc9b024ef6b02768773fa995f45..410dfbd68028627c7b6266a2c0dac00af614adaf 100644 --- a/paddle/fluid/framework/ir/mkldnn/cpu_quantize_pass.cc +++ b/paddle/fluid/framework/ir/mkldnn/cpu_quantize_pass.cc @@ -39,12 +39,13 @@ void UnlinkNodes(ir::Node* a, ir::Node* b) { b->inputs.end()); } -void LogCannotQuantizeOp(Node* op, const char* details = nullptr) { +void MarkAndLogCannotQuantizeOp(Node* op, const char* details = nullptr) { std::stringstream msg_ss; msg_ss << "Cannot quantize operator " << op->Name() << " (type: " << op->Op()->Type() << ", id: " << op->id() << ")."; if (details) msg_ss << " " << details; - PrettyLogDetail(msg_ss.str().c_str()); + VLOG(2) << msg_ss.str().c_str(); + op->Op()->SetAttr("mkldnn_data_type", std::string("float32")); } void LogScaleIsMissingForVarName(const std::string& name) { @@ -56,12 +57,19 @@ void LogScaleIsMissingForVarNode(Node* node) { } void LogQuantizationDisabled(Node* op) { - std::stringstream msg_ss; - VLOG(4) << "Qantization skipped for operator " << op->Name() + VLOG(2) << "Quantization skipped for operator " << op->Name() << " (type: " << op->Op()->Type() << ", id: " << op->id() << "). Attribute mkldnn_data_type != \"int8\"."; } +void LogQuantizedOpsCounter(const std::string& type, const int counter, + const char* details = nullptr) { + std::stringstream msg_ss; + msg_ss << "--- quantized " << counter << " " << type << " ops"; + if (details) msg_ss << " " << details; + PrettyLogDetail(msg_ss.str().c_str()); +} + } // namespace enum { U8_MAX = 255, S8_MAX = 127 }; @@ -307,9 +315,10 @@ void CPUQuantizePass::QuantizeConv(Graph* graph, auto has_output_scale = AreScalesPresentForNodes({conv_output}); if (with_residual_data && !has_output_scale) { - LogCannotQuantizeOp(conv_op, - "Conv op with ResidualData input cannot be quantized " - "without output scale."); + MarkAndLogCannotQuantizeOp( + conv_op, + "Conv op with ResidualData input cannot be quantized " + "without output scale."); return; } @@ -318,7 +327,8 @@ void CPUQuantizePass::QuantizeConv(Graph* graph, conv_pattern); if (!AreScalesPresentForNodes( {conv_input, conv_filter, conv_residual_data})) { - LogCannotQuantizeOp(conv_op, "No scale available for the operator"); + MarkAndLogCannotQuantizeOp(conv_op, + "No scale available for the operator"); return; } @@ -330,7 +340,8 @@ void CPUQuantizePass::QuantizeConv(Graph* graph, residual_scale, is_residual_unsigned, "Scale_in_eltwise"); } else { if (!AreScalesPresentForNodes({conv_input, conv_filter})) { - LogCannotQuantizeOp(conv_op, "No scale available for the operator"); + MarkAndLogCannotQuantizeOp(conv_op, + "No scale available for the operator"); return; } } @@ -377,10 +388,9 @@ void CPUQuantizePass::QuantizeConv(Graph* graph, gpd(graph, handler); AddStatis(quantize_conv_count); - std::stringstream msg_ss; - msg_ss << "--- quantized " << quantize_conv_count << " conv2d ops"; - if (with_residual_data) msg_ss << " with residual connection"; - PrettyLogDetail(msg_ss.str().c_str()); + LogQuantizedOpsCounter( + "conv2d", quantize_conv_count, + ((with_residual_data) ? "with residual connection" : "")); } void CPUQuantizePass::QuantizeFc(Graph* graph) const { @@ -405,7 +415,7 @@ void CPUQuantizePass::QuantizeFc(Graph* graph) const { return; } if (!fc->Op()->GetAttrIfExists("use_mkldnn")) { - LogCannotQuantizeOp(fc, "use_mkldnn attribute set to false"); + MarkAndLogCannotQuantizeOp(fc, "use_mkldnn attribute set to false"); return; } @@ -414,7 +424,7 @@ void CPUQuantizePass::QuantizeFc(Graph* graph) const { GET_IR_NODE_FROM_SUBGRAPH(output, output, fc_pattern); if (!AreScalesPresentForNodes({input, weights})) { - LogCannotQuantizeOp(fc, "No scale available for the operator"); + MarkAndLogCannotQuantizeOp(fc, "No scale available for the operator"); return; } @@ -448,10 +458,7 @@ void CPUQuantizePass::QuantizeFc(Graph* graph) const { gpd(graph, handler); AddStatis(quantize_fc_count); - - std::stringstream msg_ss; - msg_ss << "--- quantized " << quantize_fc_count << " fc ops"; - PrettyLogDetail(msg_ss.str().c_str()); + LogQuantizedOpsCounter("fc", quantize_fc_count); } void CPUQuantizePass::QuantizePool(Graph* graph) const { @@ -476,7 +483,8 @@ void CPUQuantizePass::QuantizePool(Graph* graph) const { GET_IR_NODE_FROM_SUBGRAPH(pool_output, pool_output, pool_pattern); if (!AreScalesPresentForNodes({pool_input, pool_output})) { - LogCannotQuantizeOp(pool_op, "No scale available for the operator"); + MarkAndLogCannotQuantizeOp(pool_op, + "No scale available for the operator"); return; } @@ -494,8 +502,7 @@ void CPUQuantizePass::QuantizePool(Graph* graph) const { gpd(graph, handler); AddStatis(quantize_pool_count); - - PrettyLogDetail("--- quantized %d pool2d ops", quantize_pool_count); + LogQuantizedOpsCounter("pool2d", quantize_pool_count); } void CPUQuantizePass::QuantizeConcat(Graph* graph) const { @@ -519,7 +526,8 @@ void CPUQuantizePass::QuantizeConcat(Graph* graph) const { GET_IR_NODE_FROM_SUBGRAPH(concat_out, concat_out, concat_pattern); if (!AreScalesPresentForNodes({concat_out})) { - LogCannotQuantizeOp(concat_op, "No scale available for the operator"); + MarkAndLogCannotQuantizeOp(concat_op, + "No scale available for the operator"); return; } @@ -539,8 +547,7 @@ void CPUQuantizePass::QuantizeConcat(Graph* graph) const { gpd(graph, handler); AddStatis(quantize_concat_count); - - PrettyLogDetail("--- quantized %d concat ops", quantize_concat_count); + LogQuantizedOpsCounter("concat", quantize_concat_count); } void CPUQuantizePass::QuantizePriorBox(Graph* graph) const { @@ -565,7 +572,8 @@ void CPUQuantizePass::QuantizePriorBox(Graph* graph) const { prior_box_pattern); if (!AreScalesPresentForNodes({prior_box_input})) { - LogCannotQuantizeOp(prior_box_op, "No scale available for the operator"); + MarkAndLogCannotQuantizeOp(prior_box_op, + "No scale available for the operator"); return; } @@ -580,9 +588,7 @@ void CPUQuantizePass::QuantizePriorBox(Graph* graph) const { gpd(graph, handler); AddStatis(quantize_prior_box_count); - - PrettyLogDetail("--- quantized %d prior_box ops", - quantize_prior_box_count); + LogQuantizedOpsCounter("prior_box", quantize_prior_box_count); } void CPUQuantizePass::QuantizeTranspose(Graph* graph) const { @@ -608,13 +614,14 @@ void CPUQuantizePass::QuantizeTranspose(Graph* graph) const { // skip if prev op and next op is not quantized if (!(IsOpDequantized(prev_op)) && !(IsOpQuantized(transpose_out))) { - LogCannotQuantizeOp(transpose_op, - "No other quantizable operators nearby"); + MarkAndLogCannotQuantizeOp(transpose_op, + "No other quantizable operators nearby"); return; } if (!AreScalesPresentForNodes({transpose_in, transpose_out})) { - LogCannotQuantizeOp(transpose_op, "No scale available for the operator"); + MarkAndLogCannotQuantizeOp(transpose_op, + "No scale available for the operator"); return; } @@ -634,9 +641,7 @@ void CPUQuantizePass::QuantizeTranspose(Graph* graph) const { gpd(graph, handler); AddStatis(quantize_transpose_count); - - PrettyLogDetail("--- quantized %d transpose ops", - quantize_transpose_count); + LogQuantizedOpsCounter("transpose2", quantize_transpose_count); } void CPUQuantizePass::QuantizeReshape(Graph* graph) const { @@ -662,12 +667,14 @@ void CPUQuantizePass::QuantizeReshape(Graph* graph) const { // skip if prev op is not quantized if (!(IsOpDequantized(prev_op)) && !(IsOpQuantized(reshape_out))) { - LogCannotQuantizeOp(reshape_op, "No other quantizable operators nearby"); + MarkAndLogCannotQuantizeOp(reshape_op, + "No other quantizable operators nearby"); return; } if (!AreScalesPresentForNodes({reshape_in, reshape_out})) { - LogCannotQuantizeOp(reshape_op, "No scale available for the operator"); + MarkAndLogCannotQuantizeOp(reshape_op, + "No scale available for the operator"); return; } @@ -686,8 +693,7 @@ void CPUQuantizePass::QuantizeReshape(Graph* graph) const { gpd(graph, handler); AddStatis(quantize_reshape_count); - - PrettyLogDetail("--- quantized %d reshape ops", quantize_reshape_count); + LogQuantizedOpsCounter("reshape2", quantize_reshape_count); } void CPUQuantizePass::QuantizeSlice(Graph* graph) const { @@ -713,12 +719,14 @@ void CPUQuantizePass::QuantizeSlice(Graph* graph) const { // skip if prev op and next op is not quantized if (!IsOpDequantized(prev_op) && !IsOpQuantized(slice_out)) { - LogCannotQuantizeOp(slice_op, "No other quantizable operators nearby"); + MarkAndLogCannotQuantizeOp(slice_op, + "No other quantizable operators nearby"); return; } if (!AreScalesPresentForNodes({slice_out})) { - LogCannotQuantizeOp(slice_op, "No scale available for the operator"); + MarkAndLogCannotQuantizeOp(slice_op, + "No scale available for the operator"); return; } @@ -737,8 +745,7 @@ void CPUQuantizePass::QuantizeSlice(Graph* graph) const { gpd(graph, handler); AddStatis(quantize_slice_count); - - PrettyLogDetail("--- quantized %d slice ops", quantize_slice_count); + LogQuantizedOpsCounter("slice", quantize_slice_count); } void CPUQuantizePass::QuantizeMatmul(Graph* graph) const { @@ -763,7 +770,8 @@ void CPUQuantizePass::QuantizeMatmul(Graph* graph) const { // skip if prev ops are not quantized if (!IsOpDequantized(prev_op_x) || !IsOpDequantized(prev_op_y)) { - LogCannotQuantizeOp(matmul_op, "No other quantizable operators nearby"); + MarkAndLogCannotQuantizeOp(matmul_op, + "No other quantizable operators nearby"); return; } GET_IR_NODE_FROM_SUBGRAPH(matmul_in_x, matmul_in_x, matmul_pattern); @@ -771,7 +779,8 @@ void CPUQuantizePass::QuantizeMatmul(Graph* graph) const { GET_IR_NODE_FROM_SUBGRAPH(matmul_out, matmul_out, matmul_pattern); if (!AreScalesPresentForNodes({matmul_in_x, matmul_in_y})) { - LogCannotQuantizeOp(matmul_op, "No scale available for the operator"); + MarkAndLogCannotQuantizeOp(matmul_op, + "No scale available for the operator"); return; } @@ -803,8 +812,7 @@ void CPUQuantizePass::QuantizeMatmul(Graph* graph) const { }; gpd(graph, handler); AddStatis(quantize_matmul_count); - - PrettyLogDetail("--- quantized %d matmul ops", quantize_matmul_count); + LogQuantizedOpsCounter("matmul", quantize_matmul_count); } void CPUQuantizePass::QuantizeElementwise( @@ -840,8 +848,8 @@ void CPUQuantizePass::QuantizeElementwise( if (!AreScalesPresentForNodes( {elementwise_x, elementwise_y, elementwise_out})) { - LogCannotQuantizeOp(elementwise_op, - "No scale available for the operator"); + MarkAndLogCannotQuantizeOp(elementwise_op, + "No scale available for the operator"); return; } @@ -851,8 +859,8 @@ void CPUQuantizePass::QuantizeElementwise( // TODO(sfraczek): add support for different signness if (is_x_unsigned != is_y_unsigned) { - LogCannotQuantizeOp(elementwise_op, - "Elementwise inputs must be of the same type."); + MarkAndLogCannotQuantizeOp( + elementwise_op, "Elementwise inputs must be of the same type."); return; } @@ -872,9 +880,7 @@ void CPUQuantizePass::QuantizeElementwise( }; gpd(graph, handler); AddStatis(quantize_elementwise_count); - - PrettyLogDetail("--- quantized %d %s ops", quantize_elementwise_count, - elementwise_type); + LogQuantizedOpsCounter(elementwise_type, quantize_elementwise_count); } void CPUQuantizePass::QuantizeFusionGru(Graph* graph) const { @@ -900,7 +906,7 @@ void CPUQuantizePass::QuantizeFusionGru(Graph* graph) const { GET_IR_NODE_FROM_SUBGRAPH(out, out, pattern); if (!AreScalesPresentForNodes({x, weight_x})) { - LogCannotQuantizeOp(op, "No scale available for the operator"); + MarkAndLogCannotQuantizeOp(op, "No scale available for the operator"); return; } @@ -929,8 +935,7 @@ void CPUQuantizePass::QuantizeFusionGru(Graph* graph) const { }; gpd(graph, handler); AddStatis(quantize_count); - - PrettyLogDetail("--- quantized %d fusion_gru ops", quantize_count); + LogQuantizedOpsCounter("fusion_gru", quantize_count); } void CPUQuantizePass::QuantizeMultiGru(Graph* graph) const { @@ -957,7 +962,7 @@ void CPUQuantizePass::QuantizeMultiGru(Graph* graph) const { auto wx_names = gru->Op()->Input("WeightX"); if (!AreScalesPresentForNodes({x}) || !AreScalesPresentForVarNames(wx_names)) { - LogCannotQuantizeOp(gru, "No scale available for the operator"); + MarkAndLogCannotQuantizeOp(gru, "No scale available for the operator"); return; } @@ -1007,8 +1012,7 @@ void CPUQuantizePass::QuantizeMultiGru(Graph* graph) const { }; gpd(graph, handler); AddStatis(quantize_count); - - PrettyLogDetail("--- quantized %d multi_gru ops", quantize_count); + LogQuantizedOpsCounter("multi_gru", quantize_count); } void CPUQuantizePass::QuantizeFusionLSTM(Graph* graph) const { @@ -1036,7 +1040,7 @@ void CPUQuantizePass::QuantizeFusionLSTM(Graph* graph) const { // Starting from here there maybe issues if (!AreScalesPresentForNodes({x, weight_x})) { - LogCannotQuantizeOp(op, "No scale available for the operator"); + MarkAndLogCannotQuantizeOp(op, "No scale available for the operator"); return; } @@ -1065,8 +1069,7 @@ void CPUQuantizePass::QuantizeFusionLSTM(Graph* graph) const { }; gpd(graph, handler); AddStatis(quantize_count); - - PrettyLogDetail("--- quantized %d fusion_lstm ops", quantize_count); + LogQuantizedOpsCounter("fusion_lstm", quantize_count); } void CPUQuantizePass::QuantizeNearestInterp(Graph* graph) const { @@ -1095,14 +1098,14 @@ void CPUQuantizePass::QuantizeNearestInterp(Graph* graph) const { // skip if prev op and next op is not quantized if (!(IsOpDequantized(prev_op)) && !(IsOpQuantized(nearest_interp_out))) { - LogCannotQuantizeOp(nearest_interp_op, - "No other quantizable operators nearby"); + MarkAndLogCannotQuantizeOp(nearest_interp_op, + "No other quantizable operators nearby"); return; } if (!AreScalesPresentForNodes({nearest_interp_in, nearest_interp_out})) { - LogCannotQuantizeOp(nearest_interp_op, - "No scale available for the operator"); + MarkAndLogCannotQuantizeOp(nearest_interp_op, + "No scale available for the operator"); return; } @@ -1123,9 +1126,7 @@ void CPUQuantizePass::QuantizeNearestInterp(Graph* graph) const { gpd(graph, handler); AddStatis(quantize_nearest_interp_count); - - PrettyLogDetail("--- quantized %d nearest_interp ops", - quantize_nearest_interp_count); + LogQuantizedOpsCounter("nearest_interp", quantize_nearest_interp_count); } void CPUQuantizePass::ApplyImpl(ir::Graph* graph) const { diff --git a/paddle/fluid/framework/ir/mkldnn/cpu_quantize_squash_pass.cc b/paddle/fluid/framework/ir/mkldnn/cpu_quantize_squash_pass.cc index a94ca438449faedf6deed6a10a8c90247f5ace90..327a21245af0d3e78e16bcbc1fc898bcd1e9b37b 100644 --- a/paddle/fluid/framework/ir/mkldnn/cpu_quantize_squash_pass.cc +++ b/paddle/fluid/framework/ir/mkldnn/cpu_quantize_squash_pass.cc @@ -434,9 +434,17 @@ void CPUQuantizeSquashPass::MultipleQuantizeSquash(Graph* graph) const { platform::errors::NotFound("Operator after quantize operator(%s) " "should has quantize output as input.", quant_out->Name())); - last_op->Op()->SetInput( - last_op_input_name, - std::vector({first_quant_out->Name()})); + + // update the next operator input, + // by replacing quant_out with first_quant_out + auto last_op_names = last_op->Op()->Input(last_op_input_name); + last_op_names.erase(std::remove(last_op_names.begin(), + last_op_names.end(), quant_out->Name()), + last_op_names.end()); + last_op_names.push_back(first_quant_out->Name()); + last_op->Op()->SetInput(last_op_input_name, + std::vector(last_op_names)); + IR_NODE_LINK_TO(first_quant_out, last_op); GraphSafeRemoveNodes(graph, {quant_op, quant_out}); removed_quantize++;