未验证 提交 36abeff4 编写于 作者: S Sylwester Fraczek 提交者: GitHub

adding elementwiseadd quantization (#25178)

上级 87a4a7ec
......@@ -37,10 +37,11 @@ void UnlinkNodes(ir::Node* a, ir::Node* b) {
b->inputs.end());
}
void LogCannotQuantizeOp(Node* op) {
void LogCannotQuantizeOp(Node* op, const char* details = nullptr) {
std::stringstream msg_ss;
msg_ss << "Cannot quantize operator " << op->Name()
<< " (type: " << op->Op()->Type() << ", id: " << op->id() << ").";
if (details) msg_ss << " " << details;
PrettyLogDetail(msg_ss.str().c_str());
}
......@@ -51,6 +52,13 @@ void LogScaleIsMissingForVar(Node* var) {
PrettyLogDetail(msg_ss.str().c_str());
}
void LogQuantizationDisabled(Node* op) {
std::stringstream msg_ss;
VLOG(4) << "Qantization skipped for operator " << op->Name()
<< " (type: " << op->Op()->Type() << ", id: " << op->id()
<< "). Attribute use_quantizer = false.";
}
} // namespace
enum { U8_MAX = 255, S8_MAX = 127 };
......@@ -239,7 +247,10 @@ void CPUQuantizePass::QuantizeConv(Graph* graph,
auto* conv_op_desc = conv_op->Op();
// skip if should not be quantized
if (!conv_op_desc->GetAttrIfExists<bool>("use_quantizer")) return;
if (!conv_op_desc->GetAttrIfExists<bool>("use_quantizer")) {
LogQuantizationDisabled(conv_op);
return;
}
GET_IR_NODE_FROM_SUBGRAPH(conv_filter, conv_filter, conv_pattern);
GET_IR_NODE_FROM_SUBGRAPH(conv_input, conv_input, conv_pattern);
......@@ -333,9 +344,13 @@ void CPUQuantizePass::QuantizeFc(Graph* graph) const {
auto* fc_op_desc = fc->Op();
// skip if should not be quantized
if (fc_op_desc->GetAttrIfExists<bool>("use_quantizer") != true ||
fc_op_desc->GetAttrIfExists<bool>("use_mkldnn") != true)
if (!fc_op_desc->GetAttrIfExists<bool>("use_quantizer")) {
LogQuantizationDisabled(fc);
return;
}
if (!fc_op_desc->GetAttrIfExists<bool>("use_mkldnn")) {
return;
}
GET_IR_NODE_FROM_SUBGRAPH(weights, weights, fc_pattern);
GET_IR_NODE_FROM_SUBGRAPH(input, input, fc_pattern);
......@@ -396,7 +411,10 @@ void CPUQuantizePass::QuantizePool(Graph* graph) const {
auto* pool_op_desc = pool_op->Op();
// skip if should not be quantized
if (!pool_op_desc->GetAttrIfExists<bool>("use_quantizer")) return;
if (!pool_op_desc->GetAttrIfExists<bool>("use_quantizer")) {
LogQuantizationDisabled(pool_op);
return;
}
GET_IR_NODE_FROM_SUBGRAPH(pool_input, pool_input, pool_pattern);
GET_IR_NODE_FROM_SUBGRAPH(pool_output, pool_output, pool_pattern);
......@@ -438,7 +456,10 @@ void CPUQuantizePass::QuantizeConcat(Graph* graph) const {
auto* concat_op_desc = concat_op->Op();
// skip if should not be quantized
if (!concat_op_desc->GetAttrIfExists<bool>("use_quantizer")) return;
if (!concat_op_desc->GetAttrIfExists<bool>("use_quantizer")) {
LogQuantizationDisabled(concat_op);
return;
}
GET_IR_NODE_FROM_SUBGRAPH(concat_out, concat_out, concat_pattern);
......@@ -481,7 +502,10 @@ void CPUQuantizePass::QuantizePriorBox(Graph* graph) const {
auto* prior_box_op_desc = prior_box_op->Op();
// skip if should not be quantized
if (!prior_box_op_desc->GetAttrIfExists<bool>("use_quantizer")) return;
if (!prior_box_op_desc->GetAttrIfExists<bool>("use_quantizer")) {
LogQuantizationDisabled(prior_box_op);
return;
}
GET_IR_NODE_FROM_SUBGRAPH(prior_box_input, prior_box_input,
prior_box_pattern);
......@@ -522,6 +546,7 @@ void CPUQuantizePass::QuantizeTranspose(Graph* graph) const {
// skip if should not be quantized
if (!transpose_op_desc->GetAttrIfExists<bool>("use_quantizer")) {
LogQuantizationDisabled(transpose_op);
return;
}
GET_IR_NODE_FROM_SUBGRAPH(prev_op, prev_op, transpose_pattern);
......@@ -576,6 +601,7 @@ void CPUQuantizePass::QuantizeReshape(Graph* graph) const {
// skip if should not be quantized
if (!reshape_op_desc->GetAttrIfExists<bool>("use_quantizer")) {
LogQuantizationDisabled(reshape_op);
return;
}
GET_IR_NODE_FROM_SUBGRAPH(prev_op, prev_op, reshape_pattern);
......@@ -628,6 +654,7 @@ void CPUQuantizePass::QuantizeMatmul(Graph* graph) const {
// skip if should not be quantized
if (!matmul_op_desc->GetAttrIfExists<bool>("use_quantizer")) {
LogQuantizationDisabled(matmul_op);
return;
}
GET_IR_NODE_FROM_SUBGRAPH(prev_op_x, prev_op_x, matmul_pattern);
......@@ -676,6 +703,80 @@ void CPUQuantizePass::QuantizeMatmul(Graph* graph) const {
PrettyLogDetail("--- quantized %d matmul ops", quantize_matmul_count);
}
void CPUQuantizePass::QuantizeElementwiseAdd(Graph* graph) const {
GraphPatternDetector gpd;
auto pattern = gpd.mutable_pattern();
patterns::ElementwiseAdd elementwise_add_pattern{pattern, name_scope_};
elementwise_add_pattern(
pattern->NewNode(elementwise_add_pattern.elementwise_add_x_repr()),
pattern->NewNode(elementwise_add_pattern.elementwise_add_y_repr()));
int quantize_elementwise_add_count = 0;
auto handler = [&](const GraphPatternDetector::subgraph_t& subgraph,
Graph* g) {
VLOG(4) << "Quantize elementwise_add op";
GET_IR_NODE_FROM_SUBGRAPH(elementwise_add_op, elementwise_add_op,
elementwise_add_pattern);
auto* elementwise_add_op_desc = elementwise_add_op->Op();
// skip if should not be quantized
if (!elementwise_add_op_desc->GetAttrIfExists<bool>("use_quantizer")) {
LogQuantizationDisabled(elementwise_add_op);
return;
}
GET_IR_NODE_FROM_SUBGRAPH(elementwise_add_x, elementwise_add_x,
elementwise_add_pattern);
GET_IR_NODE_FROM_SUBGRAPH(elementwise_add_y, elementwise_add_y,
elementwise_add_pattern);
GET_IR_NODE_FROM_SUBGRAPH(elementwise_add_out, elementwise_add_out,
elementwise_add_pattern);
if (!AreScalesPresentForNodes(elementwise_add_op,
{elementwise_add_x, elementwise_add_y})) {
LogCannotQuantizeOp(elementwise_add_op);
return;
}
bool is_x_unsigned{false}, is_y_unsigned{false};
auto input_x_scale =
GetScaleValueForNode(elementwise_add_x, &is_x_unsigned);
auto input_y_scale =
GetScaleValueForNode(elementwise_add_y, &is_y_unsigned);
// TODO(sfraczek): add support for different signness
if (is_x_unsigned != is_y_unsigned) {
LogCannotQuantizeOp(elementwise_add_op,
"ElementwiseAdd inputs must be of the same type.");
return;
}
QuantizeInput(g, elementwise_add_op, elementwise_add_x, "X", input_x_scale,
is_x_unsigned, "Scale_x");
QuantizeInput(g, elementwise_add_op, elementwise_add_y, "Y", input_y_scale,
is_y_unsigned, "Scale_y");
// if quantization scale is missing for output tensor, return fp32 data
if (AreScalesPresentForNodes(elementwise_add_op, {elementwise_add_out})) {
bool is_output_unsigned{false};
auto output_scale =
GetScaleValueForNode(elementwise_add_out, &is_output_unsigned);
DequantizeOutput(g, elementwise_add_op, elementwise_add_out, "Out",
output_scale, is_output_unsigned, "Scale_out");
} else {
elementwise_add_op->Op()->SetAttr("force_fp32_output", true);
}
++quantize_elementwise_add_count;
};
gpd(graph, handler);
AddStatis(quantize_elementwise_add_count);
PrettyLogDetail("--- quantized %d elementwise_add ops",
quantize_elementwise_add_count);
}
void CPUQuantizePass::ApplyImpl(ir::Graph* graph) const {
VLOG(3) << "Quantizing the graph.";
PADDLE_ENFORCE(graph);
......@@ -692,6 +793,7 @@ void CPUQuantizePass::ApplyImpl(ir::Graph* graph) const {
QuantizeFc(graph);
QuantizeReshape(graph);
QuantizeMatmul(graph);
QuantizeElementwiseAdd(graph);
}
} // namespace ir
......
......@@ -60,6 +60,8 @@ class CPUQuantizePass : public FusePassBase {
void QuantizeMatmul(Graph* graph) const;
void QuantizeElementwiseAdd(Graph* graph) const;
void QuantizeInput(Graph* g, Node* op, Node* input, std::string input_name,
double scale_to_one, bool is_unsigned,
std::string scale_attr_name = "") const;
......
......@@ -14,6 +14,7 @@
#include "paddle/fluid/framework/ir/mkldnn/cpu_quantize_pass.h"
#include <gtest/gtest.h>
#include "paddle/fluid/framework/naive_executor.h"
#include "paddle/fluid/imperative/type_defs.h"
#include "paddle/fluid/platform/place.h"
......@@ -82,6 +83,14 @@ void SetOp(ProgramDesc* prog, const std::string& type, const std::string& name,
op->SetAttr("Scale_x", 1.0f);
op->SetAttr("Scale_y", 1.0f);
op->SetAttr("Scale_out", 1.0f);
} else if (type == "elementwise_add") {
op->SetInput("X", {inputs[0]});
if (inputs.size() > 1) op->SetInput("Y", {inputs[1]});
op->SetOutput("Out", {outputs[0]});
op->SetAttr("use_quantizer", use_quantizer);
op->SetAttr("Scale_x", 1.0f);
op->SetAttr("Scale_y", 1.0f);
op->SetAttr("Scale_out", 1.0f);
}
}
......@@ -95,7 +104,8 @@ void InitTensorHolder(Scope* scope, const paddle::platform::Place& place,
void PreparePass(std::unique_ptr<ir::Graph>* graph, const ProgramDesc& prog,
const std::initializer_list<std::string> variable_names,
int* original_nodes_num, int* current_nodes_num,
std::string var_without_scale = "") {
std::string var_without_scale = "",
std::string var_signed = "") {
auto place = paddle::platform::CPUPlace();
NaiveExecutor exe{place};
Scope scope;
......@@ -108,8 +118,7 @@ void PreparePass(std::unique_ptr<ir::Graph>* graph, const ProgramDesc& prog,
tensor.Resize({1});
auto* ptr = tensor.mutable_data<double>(place);
ptr[0] = 2.0;
(*scales)[v] = std::make_pair(false, std::move(tensor));
(*scales)[v] = std::make_pair(v == var_signed, std::move(tensor));
}
(*graph)->SetNotOwned(kParamScopeAttr, &scope);
......@@ -387,7 +396,7 @@ static const std::initializer_list<std::string> variable_names_reshape = {
// c->Dropout->d
ProgramDesc BuildProgramDescReshape() {
ProgramDesc prog;
for (auto& v : variable_names_transpose) {
for (auto& v : variable_names_reshape) {
prog.MutableBlock(0)->Var(v);
}
SetOp(&prog, "dequantize", "Dequantize1", {"a"}, {"b"}, true);
......@@ -402,7 +411,7 @@ ProgramDesc BuildProgramDescReshape() {
// c->Dropout->d
ProgramDesc BuildProgramDescReshapeBetweenNonQuantizedOp() {
ProgramDesc prog;
for (auto& v : variable_names_transpose) {
for (auto& v : variable_names_reshape) {
prog.MutableBlock(0)->Var(v);
}
......@@ -491,7 +500,7 @@ static const std::initializer_list<std::string> variable_names_matmul = {
ProgramDesc BuildProgramDescMatmul() {
ProgramDesc prog;
for (auto& v : variable_names_transpose) {
for (auto& v : variable_names_matmul) {
prog.MutableBlock(0)->Var(v);
}
SetOp(&prog, "dequantize", "Dequantize1", {"a"}, {"b"}, true);
......@@ -504,7 +513,7 @@ ProgramDesc BuildProgramDescMatmul() {
ProgramDesc BuildProgramDescMatmulNotQuantized() {
ProgramDesc prog;
for (auto& v : variable_names_transpose) {
for (auto& v : variable_names_matmul) {
prog.MutableBlock(0)->Var(v);
}
SetOp(&prog, "dropout", "Dropout", {"a"}, {"b"}, false);
......@@ -569,6 +578,97 @@ TEST(CpuQuantizePass, matmul_not_quantized) {
MainTestMatmul(BuildProgramDescMatmulNotQuantized(), matmul_count,
quant_count, dequant_count, added_nodes_count, 1.0f);
}
static const std::initializer_list<std::string> variable_names_elementwise_add =
{"a", "b", "c", "d", "e", "f"};
ProgramDesc BuildProgramDescElementwiseAdd() {
ProgramDesc prog;
for (auto& v : variable_names_elementwise_add) {
prog.MutableBlock(0)->Var(v);
}
SetOp(&prog, "dequantize", "Dequantize1", {"a"}, {"b"}, true);
SetOp(&prog, "dequantize", "Dequantize2", {"c"}, {"d"}, true);
SetOp(&prog, "elementwise_add", "ElementwiseAdd", {"b", "d"}, {"e"}, true,
true);
SetOp(&prog, "dropout", "Dropout", {"e"}, {"f"}, true, false);
return prog;
}
void MainTestElementwiseAdd(const ProgramDesc& prog, int elementwise_add_count,
int quant_count, int dequant_count,
int added_nodes_count, float scale,
bool output_scale_missing = false,
bool unsigned_and_signed_input = false) {
std::unique_ptr<ir::Graph> graph(new ir::Graph(prog));
int original_nodes_num, current_nodes_num;
PreparePass(&graph, prog, variable_names_elementwise_add, &original_nodes_num,
&current_nodes_num, output_scale_missing ? "e" : "",
unsigned_and_signed_input ? "b" : "");
int quantize_nodes_count = 0;
int dequantize_nodes_count = 0;
int elementwise_add_nodes_count = 0;
for (auto* node : graph->Nodes()) {
if (node->IsOp()) {
auto* op = node->Op();
if (op->Type() == "elementwise_add") {
elementwise_add_nodes_count++;
if (unsigned_and_signed_input) scale = 1.0f;
auto op_name = BOOST_GET_CONST(std::string, op->GetAttr("name"));
EXPECT_EQ(BOOST_GET_CONST(float, op->GetAttr("Scale_x")), scale)
<< "Scale_x for node '" + op_name + "'.";
EXPECT_EQ(BOOST_GET_CONST(float, op->GetAttr("Scale_y")), scale)
<< "Scale_y for node '" + op_name + "'.";
if (output_scale_missing) scale = 1.0;
EXPECT_EQ(BOOST_GET_CONST(float, op->GetAttr("Scale_out")), scale)
<< "Scale_out for node '" + op_name + "'.";
} else if (op->Type() == "quantize") {
quantize_nodes_count++;
} else if (op->Type() == "dequantize") {
dequantize_nodes_count++;
}
}
}
EXPECT_EQ(elementwise_add_nodes_count, elementwise_add_count);
EXPECT_EQ(quantize_nodes_count, quant_count);
EXPECT_EQ(dequantize_nodes_count, dequant_count);
EXPECT_EQ(original_nodes_num + added_nodes_count, current_nodes_num);
}
TEST(CpuQuantizePass, elementwise_add) {
int elementwise_add_count = 1;
int quant_count = 2;
int dequant_count = 3;
// 2 Quant + 2 IN + 1 DeQuant + 1 OUT
int added_nodes_count = 6;
MainTestElementwiseAdd(BuildProgramDescElementwiseAdd(),
elementwise_add_count, quant_count, dequant_count,
added_nodes_count, 2.0f * 127);
}
TEST(CpuQuantizePass, elementwise_add_output_scale_missing) {
int elementwise_add_count = 1;
int quant_count = 2;
int dequant_count = 2;
// 2 Quant + 2 IN
int added_nodes_count = 4;
MainTestElementwiseAdd(BuildProgramDescElementwiseAdd(),
elementwise_add_count, quant_count, dequant_count,
added_nodes_count, 2.0f * 127, true);
}
TEST(CpuQuantizePass, elementwise_add_unsigned_and_signed_input) {
int elementwise_add_count = 1;
int quant_count = 0;
int dequant_count = 2;
int added_nodes_count = 0;
MainTestElementwiseAdd(BuildProgramDescElementwiseAdd(),
elementwise_add_count, quant_count, dequant_count,
added_nodes_count, 2.0f * 127, false, true);
}
} // namespace
} // namespace ir
......
......@@ -49,6 +49,10 @@ MkldnnQuantizerConfig::MkldnnQuantizerConfig() {
rules_["matmul"]["Y"] = ScaleAlgo::KL;
rules_["matmul"]["Out"] = ScaleAlgo::KL;
rules_["elementwise_add"]["X"] = ScaleAlgo::KL;
rules_["elementwise_add"]["Y"] = ScaleAlgo::KL;
rules_["elementwise_add"]["Out"] = ScaleAlgo::KL;
// Reshape2 does not perform calculation on the data and shapes are not
// changed. Scale is calculated on input data and assign to Quantize and
// Dequantize scale.
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册