提交 d419b859 编写于 作者: J joanna.wozna.intel 提交者: Tao Luo

Add reshape int8 mkldnn op (#21428)

* Add reshape int8 op

test=develop

* Change test to CPUPlace

test=develop

* Correct tests

test=develop
上级 c047e713
...@@ -1168,6 +1168,27 @@ PDNode *patterns::Transpose::operator()() { ...@@ -1168,6 +1168,27 @@ PDNode *patterns::Transpose::operator()() {
return transpose_out; return transpose_out;
} }
PDNode *patterns::Reshape::operator()() {
auto prev_op = pattern->NewNode(prev_op_repr())->assert_is_op();
auto reshape_op =
pattern->NewNode(reshape_op_repr())->assert_is_op("reshape2");
auto reshape_in = pattern->NewNode(reshape_in_repr())
->AsInput()
->assert_is_op_input("reshape2", "X");
auto reshape_out = pattern->NewNode(reshape_out_repr())
->AsOutput()
->assert_is_op_output("reshape2", "Out");
auto next_op = pattern->NewNode(next_op_repr())->assert_is_op();
prev_op->LinksTo({reshape_in});
reshape_op->LinksFrom({reshape_in}).LinksTo({reshape_out});
next_op->LinksFrom({reshape_out});
return reshape_out;
}
PDNode *patterns::ConvResidual::operator()(bool with_residual_data) { PDNode *patterns::ConvResidual::operator()(bool with_residual_data) {
auto conv_op = pattern->NewNode(conv_op_repr())->assert_is_op("conv2d"); auto conv_op = pattern->NewNode(conv_op_repr())->assert_is_op("conv2d");
......
...@@ -749,6 +749,21 @@ struct Transpose : public PatternBase { ...@@ -749,6 +749,21 @@ struct Transpose : public PatternBase {
PATTERN_DECL_NODE(next_op); PATTERN_DECL_NODE(next_op);
}; };
// Reshape op
// Forward pass for reshape.
// reshape_out is a result of the operator.
struct Reshape : public PatternBase {
Reshape(PDPattern* pattern, const std::string& name_scope)
: PatternBase(pattern, name_scope, "reshape2") {}
PDNode* operator()();
PATTERN_DECL_NODE(prev_op);
PATTERN_DECL_NODE(reshape_in);
PATTERN_DECL_NODE(reshape_op);
PATTERN_DECL_NODE(reshape_out);
PATTERN_DECL_NODE(next_op);
};
// Concat op // Concat op
// Forward pass for concat. // Forward pass for concat.
// concat_out is a result of the operator. // concat_out is a result of the operator.
......
...@@ -181,9 +181,7 @@ void CPUQuantizePass::QuantizeConv(Graph* graph, ...@@ -181,9 +181,7 @@ void CPUQuantizePass::QuantizeConv(Graph* graph,
auto* conv_op_desc = conv_op->Op(); auto* conv_op_desc = conv_op->Op();
// skip if should not be quantized // skip if should not be quantized
if (!conv_op_desc->HasAttr("use_quantizer") || if (!conv_op_desc->GetAttrIfExists<bool>("use_quantizer")) return;
!boost::get<bool>(conv_op_desc->GetAttr("use_quantizer")))
return;
GET_IR_NODE_FROM_SUBGRAPH(conv_filter, conv_filter, conv_pattern); GET_IR_NODE_FROM_SUBGRAPH(conv_filter, conv_filter, conv_pattern);
GET_IR_NODE_FROM_SUBGRAPH(conv_input, conv_input, conv_pattern); GET_IR_NODE_FROM_SUBGRAPH(conv_input, conv_input, conv_pattern);
...@@ -317,9 +315,7 @@ void CPUQuantizePass::QuantizePool(Graph* graph) const { ...@@ -317,9 +315,7 @@ void CPUQuantizePass::QuantizePool(Graph* graph) const {
auto* pool_op_desc = pool_op->Op(); auto* pool_op_desc = pool_op->Op();
// skip if should not be quantized // skip if should not be quantized
if (!pool_op_desc->HasAttr("use_quantizer") || if (!pool_op_desc->GetAttrIfExists<bool>("use_quantizer")) return;
!boost::get<bool>(pool_op_desc->GetAttr("use_quantizer")))
return;
GET_IR_NODE_FROM_SUBGRAPH(pool_input, pool_input, pool_pattern); GET_IR_NODE_FROM_SUBGRAPH(pool_input, pool_input, pool_pattern);
GET_IR_NODE_FROM_SUBGRAPH(pool_output, pool_output, pool_pattern); GET_IR_NODE_FROM_SUBGRAPH(pool_output, pool_output, pool_pattern);
...@@ -359,9 +355,7 @@ void CPUQuantizePass::QuantizeConcat(Graph* graph) const { ...@@ -359,9 +355,7 @@ void CPUQuantizePass::QuantizeConcat(Graph* graph) const {
auto* concat_op_desc = concat_op->Op(); auto* concat_op_desc = concat_op->Op();
// skip if should not be quantized // skip if should not be quantized
if (!concat_op_desc->HasAttr("use_quantizer") || if (!concat_op_desc->GetAttrIfExists<bool>("use_quantizer")) return;
!boost::get<bool>(concat_op_desc->GetAttr("use_quantizer")))
return;
GET_IR_NODE_FROM_SUBGRAPH(concat_out, concat_out, concat_pattern); GET_IR_NODE_FROM_SUBGRAPH(concat_out, concat_out, concat_pattern);
...@@ -401,9 +395,7 @@ void CPUQuantizePass::QuantizePriorBox(Graph* graph) const { ...@@ -401,9 +395,7 @@ void CPUQuantizePass::QuantizePriorBox(Graph* graph) const {
auto* prior_box_op_desc = prior_box_op->Op(); auto* prior_box_op_desc = prior_box_op->Op();
// skip if should not be quantized // skip if should not be quantized
if (!prior_box_op_desc->HasAttr("use_quantizer") || if (!prior_box_op_desc->GetAttrIfExists<bool>("use_quantizer")) return;
!boost::get<bool>(prior_box_op_desc->GetAttr("use_quantizer")))
return;
GET_IR_NODE_FROM_SUBGRAPH(prior_box_input, prior_box_input, GET_IR_NODE_FROM_SUBGRAPH(prior_box_input, prior_box_input,
prior_box_pattern); prior_box_pattern);
...@@ -446,20 +438,18 @@ void CPUQuantizePass::QuantizeTranspose(Graph* graph) const { ...@@ -446,20 +438,18 @@ void CPUQuantizePass::QuantizeTranspose(Graph* graph) const {
GET_IR_NODE_FROM_SUBGRAPH(prev_op, prev_op, transpose_pattern); GET_IR_NODE_FROM_SUBGRAPH(prev_op, prev_op, transpose_pattern);
GET_IR_NODE_FROM_SUBGRAPH(next_op, next_op, transpose_pattern); GET_IR_NODE_FROM_SUBGRAPH(next_op, next_op, transpose_pattern);
// skip if prev op is not quantized // skip if prev op and next op are not quantized
// in future we should checked if next_op is quantized
// transpose INT8 schould be used only between INT8 operators
if (!(prev_op->Op()->Type() == "dequantize" || if (!(prev_op->Op()->Type() == "dequantize" ||
(prev_op->Op()->GetAttrIfExists<bool>("use_quantizer")))) { (prev_op->Op()->GetAttrIfExists<bool>("use_quantizer"))) &&
!(next_op->Op()->Type() == "quantize" ||
(next_op->Op()->GetAttrIfExists<bool>("use_quantizer")))) {
return; return;
} }
GET_IR_NODE_FROM_SUBGRAPH(transpose_in, transpose_in, transpose_pattern); GET_IR_NODE_FROM_SUBGRAPH(transpose_in, transpose_in, transpose_pattern);
GET_IR_NODE_FROM_SUBGRAPH(transpose_out, transpose_out, transpose_pattern); GET_IR_NODE_FROM_SUBGRAPH(transpose_out, transpose_out, transpose_pattern);
// get scales calculated after warmup, they scale variables to MAX=1.0 // get scales calculated after warmup, they scale variables to MAX=1.0
auto scales = Get<VarQuantScale>("quant_var_scales"); auto scales = Get<VarQuantScale>("quant_var_scales");
auto input_scale = scales[transpose_in->Name()].second.data<double>()[0]; auto input_scale = scales[transpose_in->Name()].second.data<double>()[0];
bool is_input_unsigned = scales[transpose_in->Name()].first; bool is_input_unsigned = scales[transpose_in->Name()].first;
QuantizeInput(g, transpose_op, transpose_in, "X", input_scale, QuantizeInput(g, transpose_op, transpose_in, "X", input_scale,
...@@ -480,6 +470,58 @@ void CPUQuantizePass::QuantizeTranspose(Graph* graph) const { ...@@ -480,6 +470,58 @@ void CPUQuantizePass::QuantizeTranspose(Graph* graph) const {
quantize_transpose_count); quantize_transpose_count);
} }
void CPUQuantizePass::QuantizeReshape(Graph* graph) const {
GraphPatternDetector gpd;
auto pattern = gpd.mutable_pattern();
patterns::Reshape reshape_pattern{pattern, name_scope_};
reshape_pattern();
int quantize_reshape_count = 0;
auto handler = [&](const GraphPatternDetector::subgraph_t& subgraph,
Graph* g) {
VLOG(4) << "Quantize reshape op";
GET_IR_NODE_FROM_SUBGRAPH(reshape_op, reshape_op, reshape_pattern);
auto* reshape_op_desc = reshape_op->Op();
// skip if should not be quantized
if (!reshape_op_desc->GetAttrIfExists<bool>("use_quantizer")) {
return;
}
GET_IR_NODE_FROM_SUBGRAPH(prev_op, prev_op, reshape_pattern);
GET_IR_NODE_FROM_SUBGRAPH(next_op, next_op, reshape_pattern);
// skip if prev op and next op is not quantized
if (!(prev_op->Op()->Type() == "dequantize" ||
(prev_op->Op()->GetAttrIfExists<bool>("use_quantizer"))) &&
!(next_op->Op()->Type() == "quantize" ||
(next_op->Op()->GetAttrIfExists<bool>("use_quantizer")))) {
return;
}
GET_IR_NODE_FROM_SUBGRAPH(reshape_in, reshape_in, reshape_pattern);
GET_IR_NODE_FROM_SUBGRAPH(reshape_out, reshape_out, reshape_pattern);
// get scales calculated after warmup, they scale variables to MAX=1.0
auto scales = Get<VarQuantScale>("quant_var_scales");
auto input_scale = scales[reshape_in->Name()].second.data<double>()[0];
bool is_input_unsigned = scales[reshape_in->Name()].first;
QuantizeInput(g, reshape_op, reshape_in, "X", input_scale,
is_input_unsigned);
auto output_scale = scales[reshape_out->Name()].second.data<double>()[0];
bool is_output_unsigned = scales[reshape_out->Name()].first;
DequantizeOutput(g, reshape_op, reshape_out, "Out", output_scale,
is_output_unsigned);
++quantize_reshape_count;
};
gpd(graph, handler);
AddStatis(quantize_reshape_count);
PrettyLogDetail("--- quantized %d reshape ops", quantize_reshape_count);
}
void CPUQuantizePass::ApplyImpl(ir::Graph* graph) const { void CPUQuantizePass::ApplyImpl(ir::Graph* graph) const {
VLOG(3) << "Quantizing the graph."; VLOG(3) << "Quantizing the graph.";
PADDLE_ENFORCE(graph); PADDLE_ENFORCE(graph);
...@@ -494,6 +536,7 @@ void CPUQuantizePass::ApplyImpl(ir::Graph* graph) const { ...@@ -494,6 +536,7 @@ void CPUQuantizePass::ApplyImpl(ir::Graph* graph) const {
QuantizePriorBox(graph); QuantizePriorBox(graph);
QuantizeTranspose(graph); QuantizeTranspose(graph);
QuantizeFc(graph); QuantizeFc(graph);
QuantizeReshape(graph);
} }
} // namespace ir } // namespace ir
......
...@@ -56,6 +56,8 @@ class CPUQuantizePass : public FusePassBase { ...@@ -56,6 +56,8 @@ class CPUQuantizePass : public FusePassBase {
void QuantizeTranspose(Graph* graph) const; void QuantizeTranspose(Graph* graph) const;
void QuantizeReshape(Graph* graph) const;
void QuantizeInput(Graph* g, Node* op, Node* input, std::string input_name, void QuantizeInput(Graph* g, Node* op, Node* input, std::string input_name,
double scale_to_one, bool is_unsigned, double scale_to_one, bool is_unsigned,
std::string scale_attr_name = "") const; std::string scale_attr_name = "") const;
......
...@@ -50,7 +50,7 @@ void SetOp(ProgramDesc* prog, const std::string& type, const std::string& name, ...@@ -50,7 +50,7 @@ void SetOp(ProgramDesc* prog, const std::string& type, const std::string& name,
op->SetAttr("Scale_in", 1.0f); op->SetAttr("Scale_in", 1.0f);
op->SetAttr("Scale_out", 1.0f); op->SetAttr("Scale_out", 1.0f);
op->SetAttr("Scale_weights", std::vector<float>{1.0f}); op->SetAttr("Scale_weights", std::vector<float>{1.0f});
} else if (type == "pool2d" || type == "transpose2") { } else if (type == "pool2d" || type == "transpose2" || type == "reshape2") {
op->SetInput("X", {inputs[0]}); op->SetInput("X", {inputs[0]});
op->SetOutput("Out", {outputs[0]}); op->SetOutput("Out", {outputs[0]});
op->SetAttr("use_quantizer", use_quantizer); op->SetAttr("use_quantizer", use_quantizer);
...@@ -70,9 +70,48 @@ void SetOp(ProgramDesc* prog, const std::string& type, const std::string& name, ...@@ -70,9 +70,48 @@ void SetOp(ProgramDesc* prog, const std::string& type, const std::string& name,
op->SetInput("X", inputs); op->SetInput("X", inputs);
op->SetOutput("Out", outputs); op->SetOutput("Out", outputs);
op->SetAttr("use_quantizer", use_quantizer); op->SetAttr("use_quantizer", use_quantizer);
} else if (type == "dequantize") {
op->SetInput("Input", {inputs[0]});
op->SetOutput("Output", {outputs[0]});
op->SetAttr("Scale", 1.0f);
} }
} }
void InitTensorHolder(Scope* scope, const paddle::platform::Place& place,
const char* var_name) {
auto x = scope->Var(var_name);
auto tensor = x->GetMutable<LoDTensor>();
tensor->mutable_data(place, proto::VarType::FP32, 1);
}
void PreparePass(std::unique_ptr<ir::Graph>* graph, const ProgramDesc& prog,
const std::initializer_list<std::string> variable_names,
int* original_nodes_num, int* current_nodes_num) {
auto place = paddle::platform::CPUPlace();
NaiveExecutor exe{place};
Scope scope;
exe.CreateVariables(prog, 0, true, &scope);
auto* scales = new VarQuantScale();
for (auto& v : variable_names) {
InitTensorHolder(&scope, place, v.c_str());
LoDTensor tensor;
tensor.Resize({1});
auto* ptr = tensor.mutable_data<double>(place);
ptr[0] = 2.0;
(*scales)[v] = std::make_pair(false, std::move(tensor));
}
(*graph)->SetNotOwned(kParamScopeAttr, &scope);
std::unique_ptr<Pass> pass =
PassRegistry::Instance().Get("cpu_quantize_pass");
pass->Set("quant_var_scales", scales);
*original_nodes_num = (*graph)->Nodes().size();
(*graph).reset(pass->Apply((*graph).release()));
*current_nodes_num = (*graph)->Nodes().size();
}
namespace { namespace {
static const std::initializer_list<std::string> variable_names{ static const std::initializer_list<std::string> variable_names{
"a", "w1", "c", "d", "w2", "e", "f", "g", "h", "a", "w1", "c", "d", "w2", "e", "f", "g", "h",
...@@ -113,41 +152,6 @@ ProgramDesc BuildProgramDesc(bool use_mkldnn, bool use_quantizer) { ...@@ -113,41 +152,6 @@ ProgramDesc BuildProgramDesc(bool use_mkldnn, bool use_quantizer) {
return prog; return prog;
} }
void InitTensorHolder(Scope* scope, const paddle::platform::Place& place,
const char* var_name) {
auto x = scope->Var(var_name);
auto tensor = x->GetMutable<LoDTensor>();
tensor->mutable_data(place, proto::VarType::FP32, 1);
}
void PreparePass(std::unique_ptr<ir::Graph>* graph, const ProgramDesc& prog,
const std::initializer_list<std::string> variable_names,
int* original_nodes_num, int* current_nodes_num) {
auto place = paddle::platform::CPUPlace();
NaiveExecutor exe{place};
Scope scope;
exe.CreateVariables(prog, 0, true, &scope);
auto* scales = new VarQuantScale();
for (auto& v : variable_names) {
InitTensorHolder(&scope, place, v.c_str());
LoDTensor tensor;
tensor.Resize({1});
auto* ptr = tensor.mutable_data<double>(place);
ptr[0] = 2.0;
(*scales)[v] = std::make_pair(false, std::move(tensor));
}
(*graph)->SetNotOwned(kParamScopeAttr, &scope);
std::unique_ptr<Pass> pass =
PassRegistry::Instance().Get("cpu_quantize_pass");
pass->Set("quant_var_scales", scales);
*original_nodes_num = (*graph)->Nodes().size();
(*graph).reset(pass->Apply((*graph).release()));
*current_nodes_num = (*graph)->Nodes().size();
}
void MainTest(const ProgramDesc& prog, int conv_count, int pool_count, void MainTest(const ProgramDesc& prog, int conv_count, int pool_count,
int quant_count, int dequant_count, int added_nodes_count, int quant_count, int dequant_count, int added_nodes_count,
float scale) { float scale) {
...@@ -217,9 +221,6 @@ TEST(CpuQuantizePass, do_not_quantize) { ...@@ -217,9 +221,6 @@ TEST(CpuQuantizePass, do_not_quantize) {
1.0f); 1.0f);
} }
} // namespace
namespace {
static const std::initializer_list<std::string> variable_names_concat = { static const std::initializer_list<std::string> variable_names_concat = {
"a1", "b1", "a2", "b2", "c", "d"}; "a1", "b1", "a2", "b2", "c", "d"};
...@@ -283,9 +284,7 @@ TEST(CpuQuantizePass, concat) { ...@@ -283,9 +284,7 @@ TEST(CpuQuantizePass, concat) {
MainTestConcat(BuildProgramDescConcat(), pool_count, concat_count, MainTestConcat(BuildProgramDescConcat(), pool_count, concat_count,
quant_count, dequant_count, added_nodes_count); quant_count, dequant_count, added_nodes_count);
} }
} // namespace
namespace {
static const std::initializer_list<std::string> variable_names_transpose = { static const std::initializer_list<std::string> variable_names_transpose = {
"a", "w1", "b", "c", "w2", "d", "e", "f"}; "a", "w1", "b", "c", "w2", "d", "e", "f"};
...@@ -365,11 +364,119 @@ TEST(CpuQuantizePass, transpose) { ...@@ -365,11 +364,119 @@ TEST(CpuQuantizePass, transpose) {
int quant_count = 4; int quant_count = 4;
int dequant_count = 4; int dequant_count = 4;
// 4 Quant + 4 IN + 4 DeQuant + 4 OUT // 4 Quant + 4 IN + 4 DeQuant + 4 OUT
int added_nodes_count = 16; int added_nodes_count = 4 + 4 + 4 + 4;
MainTestTranspose(BuildProgramDescTranspose(), conv_count, transpose_count, MainTestTranspose(BuildProgramDescTranspose(), conv_count, transpose_count,
quant_count, dequant_count, added_nodes_count, 2.0f * 127); quant_count, dequant_count, added_nodes_count, 2.0f * 127);
} }
static const std::initializer_list<std::string> variable_names_reshape = {
"a", "w1", "b", "c", "d", "e", "f"};
// a->Dequantize->b
// b->Reshape->c
// c->Dropout->d
ProgramDesc BuildProgramDescReshape() {
ProgramDesc prog;
for (auto& v : variable_names_transpose) {
prog.MutableBlock(0)->Var(v);
}
SetOp(&prog, "dequantize", "Dequantize1", {"a"}, {"b"}, true);
SetOp(&prog, "reshape2", "Reshape2", {"b"}, {"c"}, true, true);
SetOp(&prog, "dropout", "Dropout", {"c"}, {"d"}, true, false);
return prog;
}
// a->Transpose->b
// b->Reshape->c
// c->Dropout->d
ProgramDesc BuildProgramDescReshapeBetweenNonQuantizedOp() {
ProgramDesc prog;
for (auto& v : variable_names_transpose) {
prog.MutableBlock(0)->Var(v);
}
SetOp(&prog, "transpose2", "Transpose2", {"a"}, {"b"}, true, false);
SetOp(&prog, "reshape2", "Reshape2", {"b"}, {"c"}, true, true);
SetOp(&prog, "dropout", "Dropout", {"c"}, {"d"}, true, false);
return prog;
}
void MainTestReshape(const ProgramDesc& prog, int transpose_count,
int reshape_count, int quant_count, int dequant_count,
int added_nodes_count, float scale) {
std::unique_ptr<ir::Graph> graph(new ir::Graph(prog));
int original_nodes_num, current_nodes_num;
PreparePass(&graph, prog, variable_names_reshape, &original_nodes_num,
&current_nodes_num);
float quant_scale = 1.0f;
float dequant_scale = 1.0f;
int quantize_nodes_count = 0;
int dequantize_nodes_count = 0;
int transpose_nodes_count = 0;
int reshape_nodes_count = 0;
for (auto* node : graph->Nodes()) {
if (node->IsOp()) {
auto* op = node->Op();
if (op->Type() == "transpose2") {
transpose_nodes_count++;
} else if (op->Type() == "reshape2") {
reshape_nodes_count++;
} else if (op->Type() == "quantize") {
quantize_nodes_count++;
quant_scale = boost::get<float>(op->GetAttr("Scale"));
EXPECT_EQ(quant_scale, scale) << "Scale for node '" + op->Type() + "'.";
} else if (op->Type() == "dequantize") {
dequantize_nodes_count++;
auto op_name = op->GetAttrIfExists<std::string>("name");
std::cout << op_name << " \n";
if (op_name != "Dequantize1") {
dequant_scale = boost::get<float>(op->GetAttr("Scale"));
EXPECT_EQ(dequant_scale, scale)
<< "Scale for node '" + op->Type() + "'.";
}
}
}
}
EXPECT_EQ(transpose_nodes_count, transpose_count);
EXPECT_EQ(reshape_nodes_count, reshape_count);
EXPECT_EQ(quantize_nodes_count, quant_count);
EXPECT_EQ(dequantize_nodes_count, dequant_count);
EXPECT_EQ(original_nodes_num + added_nodes_count, current_nodes_num);
}
TEST(CpuQuantizePass, reshape) {
// a->Dequantize->b
// b2->Quant->b3->Reshape2->c1->Dequant->c2
// c2->Dropout->d
int reshape_count = 1;
int transpose_count = 0;
int quant_count = 1;
int dequant_count = 2;
// 1 Quant + 1 IN + 1 DeQuant + 1 OUT
int added_nodes_count = 4;
MainTestReshape(BuildProgramDescReshape(), transpose_count, reshape_count,
quant_count, dequant_count, added_nodes_count, 2.0f * 127);
}
TEST(CpuQuantizePass, reshapeBetweenNonQuantizedOp) {
// a->Transpos2->b
// b->Reshape2->c
// c->Dropout->d
int reshape_count = 1;
int transpose_count = 1;
int quant_count = 0;
int dequant_count = 0;
// 0 Quant + 0 IN + 0 DeQuant + 0 OUT
int added_nodes_count = 0;
MainTestReshape(BuildProgramDescReshapeBetweenNonQuantizedOp(),
transpose_count, reshape_count, quant_count, dequant_count,
added_nodes_count, 2.0f * 127);
}
} // namespace } // namespace
} // namespace ir } // namespace ir
} // namespace framework } // namespace framework
} // namespace paddle } // namespace paddle
......
...@@ -35,6 +35,8 @@ MkldnnQuantizerConfig::MkldnnQuantizerConfig() { ...@@ -35,6 +35,8 @@ MkldnnQuantizerConfig::MkldnnQuantizerConfig() {
rules_["prior_box"]["Boxes"] = ScaleAlgo::NONE; rules_["prior_box"]["Boxes"] = ScaleAlgo::NONE;
rules_["prior_box"]["Variances"] = ScaleAlgo::NONE; rules_["prior_box"]["Variances"] = ScaleAlgo::NONE;
// Transpose2 does not perform calculation on the data. Scale is calculated on
// input data and assign to Quantize and Dequantize scale.
rules_["transpose2"]["X"] = ScaleAlgo::KL; rules_["transpose2"]["X"] = ScaleAlgo::KL;
rules_["transpose2"]["Out"] = ScaleAlgo::NONE; rules_["transpose2"]["Out"] = ScaleAlgo::NONE;
...@@ -42,6 +44,15 @@ MkldnnQuantizerConfig::MkldnnQuantizerConfig() { ...@@ -42,6 +44,15 @@ MkldnnQuantizerConfig::MkldnnQuantizerConfig() {
rules_["fc"]["W"] = ScaleAlgo::MAX_CH_T; rules_["fc"]["W"] = ScaleAlgo::MAX_CH_T;
rules_["fc"]["Bias"] = ScaleAlgo::NONE; rules_["fc"]["Bias"] = ScaleAlgo::NONE;
rules_["fc"]["Out"] = ScaleAlgo::KL; rules_["fc"]["Out"] = ScaleAlgo::KL;
// Reshape2 does not perform calculation on the data and shapes are not
// changed. Scale is calculated on input data and assign to Quantize and
// Dequantize scale.
rules_["reshape2"]["X"] = ScaleAlgo::KL;
rules_["reshape2"]["Shape"] = ScaleAlgo::NONE;
rules_["reshape2"]["ShapeTensor"] = ScaleAlgo::NONE;
rules_["reshape2"]["XShape"] = ScaleAlgo::NONE;
rules_["reshape2"]["Out"] = ScaleAlgo::NONE;
} }
ScaleAlgo MkldnnQuantizerConfig::scale_algo( ScaleAlgo MkldnnQuantizerConfig::scale_algo(
......
...@@ -269,7 +269,7 @@ TEST(Analyzer_int8_mobilenet_ssd, quantization) { ...@@ -269,7 +269,7 @@ TEST(Analyzer_int8_mobilenet_ssd, quantization) {
q_cfg.EnableMkldnnQuantizer(); q_cfg.EnableMkldnnQuantizer();
q_cfg.mkldnn_quantizer_config(); q_cfg.mkldnn_quantizer_config();
std::unordered_set<std::string> quantize_operators( std::unordered_set<std::string> quantize_operators(
{"conv2d", "depthwise_conv2d", "prior_box", "transpose2"}); {"conv2d", "depthwise_conv2d", "prior_box", "transpose2", "reshape2"});
q_cfg.mkldnn_quantizer_config()->SetEnabledOpTypes(quantize_operators); q_cfg.mkldnn_quantizer_config()->SetEnabledOpTypes(quantize_operators);
q_cfg.mkldnn_quantizer_config()->SetWarmupData(warmup_data); q_cfg.mkldnn_quantizer_config()->SetWarmupData(warmup_data);
q_cfg.mkldnn_quantizer_config()->SetWarmupBatchSize(FLAGS_warmup_batch_size); q_cfg.mkldnn_quantizer_config()->SetWarmupBatchSize(FLAGS_warmup_batch_size);
......
...@@ -419,6 +419,13 @@ class Reshape2OpMaker : public ReshapeOpMaker { ...@@ -419,6 +419,13 @@ class Reshape2OpMaker : public ReshapeOpMaker {
"XShape is just used to store the shape and lod of X, which will " "XShape is just used to store the shape and lod of X, which will "
"be used in FlattenGradOp.") "be used in FlattenGradOp.")
.AsIntermediate(); .AsIntermediate();
/* int8 parameters */
AddAttr<bool>("use_quantizer",
"(bool, default false) "
"Set to true for operators that should be quantized and use "
"int8 kernel. "
"Used only on CPU.")
.SetDefault(false);
} }
}; };
...@@ -572,8 +579,9 @@ REGISTER_OPERATOR(reshape2_grad_grad, ops::Reshape2DoubleGradOp, ...@@ -572,8 +579,9 @@ REGISTER_OPERATOR(reshape2_grad_grad, ops::Reshape2DoubleGradOp,
ops::ReshapeDoubleGradInplaceInToOut); ops::ReshapeDoubleGradInplaceInToOut);
REGISTER_OP_CPU_KERNEL_FUNCTOR(reshape2, float, ops::ReshapeKernel, double, REGISTER_OP_CPU_KERNEL_FUNCTOR(reshape2, float, ops::ReshapeKernel, double,
ops::ReshapeKernel, int, ops::ReshapeKernel, ops::ReshapeKernel, int8_t, ops::ReshapeKernel,
int64_t, ops::ReshapeKernel); uint8_t, ops::ReshapeKernel, int,
ops::ReshapeKernel, int64_t, ops::ReshapeKernel);
REGISTER_OP_CPU_KERNEL_FUNCTOR(reshape2_grad, float, ops::ReshapeGradKernel, REGISTER_OP_CPU_KERNEL_FUNCTOR(reshape2_grad, float, ops::ReshapeGradKernel,
double, ops::ReshapeGradKernel, int, double, ops::ReshapeGradKernel, int,
ops::ReshapeGradKernel, int64_t, ops::ReshapeGradKernel, int64_t,
......
...@@ -40,7 +40,6 @@ class TestReshapeOp(OpTest): ...@@ -40,7 +40,6 @@ class TestReshapeOp(OpTest):
self.infered_shape = (12, 10) self.infered_shape = (12, 10)
def test_check_output(self): def test_check_output(self):
self.check_output(no_check_set=['XShape']) self.check_output(no_check_set=['XShape'])
def test_check_grad(self): def test_check_grad(self):
...@@ -185,6 +184,47 @@ class TestReshapeOpDimInfer2_attr_OnlyShape(TestReshapeOp_attr_OnlyShape): ...@@ -185,6 +184,47 @@ class TestReshapeOpDimInfer2_attr_OnlyShape(TestReshapeOp_attr_OnlyShape):
self.shape = (2, 0, 3, -1) self.shape = (2, 0, 3, -1)
# test int8 data type on CPU
class TestReshapeInt8Op(OpTest):
def setUp(self):
self.init_dtype()
self.init_data()
self.use_mkldnn = True
self._cpu_only = True
self.op_type = "reshape2"
input = np.random.randint(0, 127, self.ori_shape).astype(self.dtype)
self.inputs = {'X': OpTest.np_dtype_to_fluid_dtype(input)}
self.attrs = {
'shape': self.new_shape,
'use_mkldnn': self.use_mkldnn,
}
self.outputs = {
"Out": self.inputs["X"].reshape(self.infered_shape),
'XShape': np.random.random(self.ori_shape).astype(np.float32)
}
def init_dtype(self):
self.dtype = np.int8
def init_data(self):
self.ori_shape = (2, 2, 6)
self.new_shape = (2, 0, 3, -1)
self.infered_shape = (2, 2, 3, -1)
def test_check_output(self):
self.check_output_with_place(
fluid.core.CPUPlace(), atol=1e-5, no_check_set=['XShape'])
def test_check_grad(self):
pass
# test unt8 data type on CPU
class TestReshapeUint8Op(TestReshapeInt8Op):
def init_dtype(self):
self.dtype = np.uint8
# Test python API # Test python API
class TestReshapeAPI(unittest.TestCase): class TestReshapeAPI(unittest.TestCase):
# situation 1: have shape( list, no tensor), no actual shape(Tensor) # situation 1: have shape( list, no tensor), no actual shape(Tensor)
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册