提交 d419b859 编写于 作者: J joanna.wozna.intel 提交者: Tao Luo

Add reshape int8 mkldnn op (#21428)

* Add reshape int8 op

test=develop

* Change test to CPUPlace

test=develop

* Correct tests

test=develop
上级 c047e713
......@@ -1168,6 +1168,27 @@ PDNode *patterns::Transpose::operator()() {
return transpose_out;
}
PDNode *patterns::Reshape::operator()() {
auto prev_op = pattern->NewNode(prev_op_repr())->assert_is_op();
auto reshape_op =
pattern->NewNode(reshape_op_repr())->assert_is_op("reshape2");
auto reshape_in = pattern->NewNode(reshape_in_repr())
->AsInput()
->assert_is_op_input("reshape2", "X");
auto reshape_out = pattern->NewNode(reshape_out_repr())
->AsOutput()
->assert_is_op_output("reshape2", "Out");
auto next_op = pattern->NewNode(next_op_repr())->assert_is_op();
prev_op->LinksTo({reshape_in});
reshape_op->LinksFrom({reshape_in}).LinksTo({reshape_out});
next_op->LinksFrom({reshape_out});
return reshape_out;
}
PDNode *patterns::ConvResidual::operator()(bool with_residual_data) {
auto conv_op = pattern->NewNode(conv_op_repr())->assert_is_op("conv2d");
......
......@@ -749,6 +749,21 @@ struct Transpose : public PatternBase {
PATTERN_DECL_NODE(next_op);
};
// Reshape op
// Forward pass for reshape.
// reshape_out is a result of the operator.
struct Reshape : public PatternBase {
Reshape(PDPattern* pattern, const std::string& name_scope)
: PatternBase(pattern, name_scope, "reshape2") {}
PDNode* operator()();
PATTERN_DECL_NODE(prev_op);
PATTERN_DECL_NODE(reshape_in);
PATTERN_DECL_NODE(reshape_op);
PATTERN_DECL_NODE(reshape_out);
PATTERN_DECL_NODE(next_op);
};
// Concat op
// Forward pass for concat.
// concat_out is a result of the operator.
......
......@@ -181,9 +181,7 @@ void CPUQuantizePass::QuantizeConv(Graph* graph,
auto* conv_op_desc = conv_op->Op();
// skip if should not be quantized
if (!conv_op_desc->HasAttr("use_quantizer") ||
!boost::get<bool>(conv_op_desc->GetAttr("use_quantizer")))
return;
if (!conv_op_desc->GetAttrIfExists<bool>("use_quantizer")) return;
GET_IR_NODE_FROM_SUBGRAPH(conv_filter, conv_filter, conv_pattern);
GET_IR_NODE_FROM_SUBGRAPH(conv_input, conv_input, conv_pattern);
......@@ -317,9 +315,7 @@ void CPUQuantizePass::QuantizePool(Graph* graph) const {
auto* pool_op_desc = pool_op->Op();
// skip if should not be quantized
if (!pool_op_desc->HasAttr("use_quantizer") ||
!boost::get<bool>(pool_op_desc->GetAttr("use_quantizer")))
return;
if (!pool_op_desc->GetAttrIfExists<bool>("use_quantizer")) return;
GET_IR_NODE_FROM_SUBGRAPH(pool_input, pool_input, pool_pattern);
GET_IR_NODE_FROM_SUBGRAPH(pool_output, pool_output, pool_pattern);
......@@ -359,9 +355,7 @@ void CPUQuantizePass::QuantizeConcat(Graph* graph) const {
auto* concat_op_desc = concat_op->Op();
// skip if should not be quantized
if (!concat_op_desc->HasAttr("use_quantizer") ||
!boost::get<bool>(concat_op_desc->GetAttr("use_quantizer")))
return;
if (!concat_op_desc->GetAttrIfExists<bool>("use_quantizer")) return;
GET_IR_NODE_FROM_SUBGRAPH(concat_out, concat_out, concat_pattern);
......@@ -401,9 +395,7 @@ void CPUQuantizePass::QuantizePriorBox(Graph* graph) const {
auto* prior_box_op_desc = prior_box_op->Op();
// skip if should not be quantized
if (!prior_box_op_desc->HasAttr("use_quantizer") ||
!boost::get<bool>(prior_box_op_desc->GetAttr("use_quantizer")))
return;
if (!prior_box_op_desc->GetAttrIfExists<bool>("use_quantizer")) return;
GET_IR_NODE_FROM_SUBGRAPH(prior_box_input, prior_box_input,
prior_box_pattern);
......@@ -446,20 +438,18 @@ void CPUQuantizePass::QuantizeTranspose(Graph* graph) const {
GET_IR_NODE_FROM_SUBGRAPH(prev_op, prev_op, transpose_pattern);
GET_IR_NODE_FROM_SUBGRAPH(next_op, next_op, transpose_pattern);
// skip if prev op is not quantized
// in future we should checked if next_op is quantized
// transpose INT8 schould be used only between INT8 operators
// skip if prev op and next op are not quantized
if (!(prev_op->Op()->Type() == "dequantize" ||
(prev_op->Op()->GetAttrIfExists<bool>("use_quantizer")))) {
(prev_op->Op()->GetAttrIfExists<bool>("use_quantizer"))) &&
!(next_op->Op()->Type() == "quantize" ||
(next_op->Op()->GetAttrIfExists<bool>("use_quantizer")))) {
return;
}
GET_IR_NODE_FROM_SUBGRAPH(transpose_in, transpose_in, transpose_pattern);
GET_IR_NODE_FROM_SUBGRAPH(transpose_out, transpose_out, transpose_pattern);
// get scales calculated after warmup, they scale variables to MAX=1.0
auto scales = Get<VarQuantScale>("quant_var_scales");
auto input_scale = scales[transpose_in->Name()].second.data<double>()[0];
bool is_input_unsigned = scales[transpose_in->Name()].first;
QuantizeInput(g, transpose_op, transpose_in, "X", input_scale,
......@@ -480,6 +470,58 @@ void CPUQuantizePass::QuantizeTranspose(Graph* graph) const {
quantize_transpose_count);
}
void CPUQuantizePass::QuantizeReshape(Graph* graph) const {
GraphPatternDetector gpd;
auto pattern = gpd.mutable_pattern();
patterns::Reshape reshape_pattern{pattern, name_scope_};
reshape_pattern();
int quantize_reshape_count = 0;
auto handler = [&](const GraphPatternDetector::subgraph_t& subgraph,
Graph* g) {
VLOG(4) << "Quantize reshape op";
GET_IR_NODE_FROM_SUBGRAPH(reshape_op, reshape_op, reshape_pattern);
auto* reshape_op_desc = reshape_op->Op();
// skip if should not be quantized
if (!reshape_op_desc->GetAttrIfExists<bool>("use_quantizer")) {
return;
}
GET_IR_NODE_FROM_SUBGRAPH(prev_op, prev_op, reshape_pattern);
GET_IR_NODE_FROM_SUBGRAPH(next_op, next_op, reshape_pattern);
// skip if prev op and next op is not quantized
if (!(prev_op->Op()->Type() == "dequantize" ||
(prev_op->Op()->GetAttrIfExists<bool>("use_quantizer"))) &&
!(next_op->Op()->Type() == "quantize" ||
(next_op->Op()->GetAttrIfExists<bool>("use_quantizer")))) {
return;
}
GET_IR_NODE_FROM_SUBGRAPH(reshape_in, reshape_in, reshape_pattern);
GET_IR_NODE_FROM_SUBGRAPH(reshape_out, reshape_out, reshape_pattern);
// get scales calculated after warmup, they scale variables to MAX=1.0
auto scales = Get<VarQuantScale>("quant_var_scales");
auto input_scale = scales[reshape_in->Name()].second.data<double>()[0];
bool is_input_unsigned = scales[reshape_in->Name()].first;
QuantizeInput(g, reshape_op, reshape_in, "X", input_scale,
is_input_unsigned);
auto output_scale = scales[reshape_out->Name()].second.data<double>()[0];
bool is_output_unsigned = scales[reshape_out->Name()].first;
DequantizeOutput(g, reshape_op, reshape_out, "Out", output_scale,
is_output_unsigned);
++quantize_reshape_count;
};
gpd(graph, handler);
AddStatis(quantize_reshape_count);
PrettyLogDetail("--- quantized %d reshape ops", quantize_reshape_count);
}
void CPUQuantizePass::ApplyImpl(ir::Graph* graph) const {
VLOG(3) << "Quantizing the graph.";
PADDLE_ENFORCE(graph);
......@@ -494,6 +536,7 @@ void CPUQuantizePass::ApplyImpl(ir::Graph* graph) const {
QuantizePriorBox(graph);
QuantizeTranspose(graph);
QuantizeFc(graph);
QuantizeReshape(graph);
}
} // namespace ir
......
......@@ -56,6 +56,8 @@ class CPUQuantizePass : public FusePassBase {
void QuantizeTranspose(Graph* graph) const;
void QuantizeReshape(Graph* graph) const;
void QuantizeInput(Graph* g, Node* op, Node* input, std::string input_name,
double scale_to_one, bool is_unsigned,
std::string scale_attr_name = "") const;
......
......@@ -50,7 +50,7 @@ void SetOp(ProgramDesc* prog, const std::string& type, const std::string& name,
op->SetAttr("Scale_in", 1.0f);
op->SetAttr("Scale_out", 1.0f);
op->SetAttr("Scale_weights", std::vector<float>{1.0f});
} else if (type == "pool2d" || type == "transpose2") {
} else if (type == "pool2d" || type == "transpose2" || type == "reshape2") {
op->SetInput("X", {inputs[0]});
op->SetOutput("Out", {outputs[0]});
op->SetAttr("use_quantizer", use_quantizer);
......@@ -70,9 +70,48 @@ void SetOp(ProgramDesc* prog, const std::string& type, const std::string& name,
op->SetInput("X", inputs);
op->SetOutput("Out", outputs);
op->SetAttr("use_quantizer", use_quantizer);
} else if (type == "dequantize") {
op->SetInput("Input", {inputs[0]});
op->SetOutput("Output", {outputs[0]});
op->SetAttr("Scale", 1.0f);
}
}
void InitTensorHolder(Scope* scope, const paddle::platform::Place& place,
const char* var_name) {
auto x = scope->Var(var_name);
auto tensor = x->GetMutable<LoDTensor>();
tensor->mutable_data(place, proto::VarType::FP32, 1);
}
void PreparePass(std::unique_ptr<ir::Graph>* graph, const ProgramDesc& prog,
const std::initializer_list<std::string> variable_names,
int* original_nodes_num, int* current_nodes_num) {
auto place = paddle::platform::CPUPlace();
NaiveExecutor exe{place};
Scope scope;
exe.CreateVariables(prog, 0, true, &scope);
auto* scales = new VarQuantScale();
for (auto& v : variable_names) {
InitTensorHolder(&scope, place, v.c_str());
LoDTensor tensor;
tensor.Resize({1});
auto* ptr = tensor.mutable_data<double>(place);
ptr[0] = 2.0;
(*scales)[v] = std::make_pair(false, std::move(tensor));
}
(*graph)->SetNotOwned(kParamScopeAttr, &scope);
std::unique_ptr<Pass> pass =
PassRegistry::Instance().Get("cpu_quantize_pass");
pass->Set("quant_var_scales", scales);
*original_nodes_num = (*graph)->Nodes().size();
(*graph).reset(pass->Apply((*graph).release()));
*current_nodes_num = (*graph)->Nodes().size();
}
namespace {
static const std::initializer_list<std::string> variable_names{
"a", "w1", "c", "d", "w2", "e", "f", "g", "h",
......@@ -113,41 +152,6 @@ ProgramDesc BuildProgramDesc(bool use_mkldnn, bool use_quantizer) {
return prog;
}
void InitTensorHolder(Scope* scope, const paddle::platform::Place& place,
const char* var_name) {
auto x = scope->Var(var_name);
auto tensor = x->GetMutable<LoDTensor>();
tensor->mutable_data(place, proto::VarType::FP32, 1);
}
void PreparePass(std::unique_ptr<ir::Graph>* graph, const ProgramDesc& prog,
const std::initializer_list<std::string> variable_names,
int* original_nodes_num, int* current_nodes_num) {
auto place = paddle::platform::CPUPlace();
NaiveExecutor exe{place};
Scope scope;
exe.CreateVariables(prog, 0, true, &scope);
auto* scales = new VarQuantScale();
for (auto& v : variable_names) {
InitTensorHolder(&scope, place, v.c_str());
LoDTensor tensor;
tensor.Resize({1});
auto* ptr = tensor.mutable_data<double>(place);
ptr[0] = 2.0;
(*scales)[v] = std::make_pair(false, std::move(tensor));
}
(*graph)->SetNotOwned(kParamScopeAttr, &scope);
std::unique_ptr<Pass> pass =
PassRegistry::Instance().Get("cpu_quantize_pass");
pass->Set("quant_var_scales", scales);
*original_nodes_num = (*graph)->Nodes().size();
(*graph).reset(pass->Apply((*graph).release()));
*current_nodes_num = (*graph)->Nodes().size();
}
void MainTest(const ProgramDesc& prog, int conv_count, int pool_count,
int quant_count, int dequant_count, int added_nodes_count,
float scale) {
......@@ -217,9 +221,6 @@ TEST(CpuQuantizePass, do_not_quantize) {
1.0f);
}
} // namespace
namespace {
static const std::initializer_list<std::string> variable_names_concat = {
"a1", "b1", "a2", "b2", "c", "d"};
......@@ -283,9 +284,7 @@ TEST(CpuQuantizePass, concat) {
MainTestConcat(BuildProgramDescConcat(), pool_count, concat_count,
quant_count, dequant_count, added_nodes_count);
}
} // namespace
namespace {
static const std::initializer_list<std::string> variable_names_transpose = {
"a", "w1", "b", "c", "w2", "d", "e", "f"};
......@@ -365,11 +364,119 @@ TEST(CpuQuantizePass, transpose) {
int quant_count = 4;
int dequant_count = 4;
// 4 Quant + 4 IN + 4 DeQuant + 4 OUT
int added_nodes_count = 16;
int added_nodes_count = 4 + 4 + 4 + 4;
MainTestTranspose(BuildProgramDescTranspose(), conv_count, transpose_count,
quant_count, dequant_count, added_nodes_count, 2.0f * 127);
}
static const std::initializer_list<std::string> variable_names_reshape = {
"a", "w1", "b", "c", "d", "e", "f"};
// a->Dequantize->b
// b->Reshape->c
// c->Dropout->d
ProgramDesc BuildProgramDescReshape() {
ProgramDesc prog;
for (auto& v : variable_names_transpose) {
prog.MutableBlock(0)->Var(v);
}
SetOp(&prog, "dequantize", "Dequantize1", {"a"}, {"b"}, true);
SetOp(&prog, "reshape2", "Reshape2", {"b"}, {"c"}, true, true);
SetOp(&prog, "dropout", "Dropout", {"c"}, {"d"}, true, false);
return prog;
}
// a->Transpose->b
// b->Reshape->c
// c->Dropout->d
ProgramDesc BuildProgramDescReshapeBetweenNonQuantizedOp() {
ProgramDesc prog;
for (auto& v : variable_names_transpose) {
prog.MutableBlock(0)->Var(v);
}
SetOp(&prog, "transpose2", "Transpose2", {"a"}, {"b"}, true, false);
SetOp(&prog, "reshape2", "Reshape2", {"b"}, {"c"}, true, true);
SetOp(&prog, "dropout", "Dropout", {"c"}, {"d"}, true, false);
return prog;
}
void MainTestReshape(const ProgramDesc& prog, int transpose_count,
int reshape_count, int quant_count, int dequant_count,
int added_nodes_count, float scale) {
std::unique_ptr<ir::Graph> graph(new ir::Graph(prog));
int original_nodes_num, current_nodes_num;
PreparePass(&graph, prog, variable_names_reshape, &original_nodes_num,
&current_nodes_num);
float quant_scale = 1.0f;
float dequant_scale = 1.0f;
int quantize_nodes_count = 0;
int dequantize_nodes_count = 0;
int transpose_nodes_count = 0;
int reshape_nodes_count = 0;
for (auto* node : graph->Nodes()) {
if (node->IsOp()) {
auto* op = node->Op();
if (op->Type() == "transpose2") {
transpose_nodes_count++;
} else if (op->Type() == "reshape2") {
reshape_nodes_count++;
} else if (op->Type() == "quantize") {
quantize_nodes_count++;
quant_scale = boost::get<float>(op->GetAttr("Scale"));
EXPECT_EQ(quant_scale, scale) << "Scale for node '" + op->Type() + "'.";
} else if (op->Type() == "dequantize") {
dequantize_nodes_count++;
auto op_name = op->GetAttrIfExists<std::string>("name");
std::cout << op_name << " \n";
if (op_name != "Dequantize1") {
dequant_scale = boost::get<float>(op->GetAttr("Scale"));
EXPECT_EQ(dequant_scale, scale)
<< "Scale for node '" + op->Type() + "'.";
}
}
}
}
EXPECT_EQ(transpose_nodes_count, transpose_count);
EXPECT_EQ(reshape_nodes_count, reshape_count);
EXPECT_EQ(quantize_nodes_count, quant_count);
EXPECT_EQ(dequantize_nodes_count, dequant_count);
EXPECT_EQ(original_nodes_num + added_nodes_count, current_nodes_num);
}
TEST(CpuQuantizePass, reshape) {
// a->Dequantize->b
// b2->Quant->b3->Reshape2->c1->Dequant->c2
// c2->Dropout->d
int reshape_count = 1;
int transpose_count = 0;
int quant_count = 1;
int dequant_count = 2;
// 1 Quant + 1 IN + 1 DeQuant + 1 OUT
int added_nodes_count = 4;
MainTestReshape(BuildProgramDescReshape(), transpose_count, reshape_count,
quant_count, dequant_count, added_nodes_count, 2.0f * 127);
}
TEST(CpuQuantizePass, reshapeBetweenNonQuantizedOp) {
// a->Transpos2->b
// b->Reshape2->c
// c->Dropout->d
int reshape_count = 1;
int transpose_count = 1;
int quant_count = 0;
int dequant_count = 0;
// 0 Quant + 0 IN + 0 DeQuant + 0 OUT
int added_nodes_count = 0;
MainTestReshape(BuildProgramDescReshapeBetweenNonQuantizedOp(),
transpose_count, reshape_count, quant_count, dequant_count,
added_nodes_count, 2.0f * 127);
}
} // namespace
} // namespace ir
} // namespace framework
} // namespace paddle
......
......@@ -35,6 +35,8 @@ MkldnnQuantizerConfig::MkldnnQuantizerConfig() {
rules_["prior_box"]["Boxes"] = ScaleAlgo::NONE;
rules_["prior_box"]["Variances"] = ScaleAlgo::NONE;
// Transpose2 does not perform calculation on the data. Scale is calculated on
// input data and assign to Quantize and Dequantize scale.
rules_["transpose2"]["X"] = ScaleAlgo::KL;
rules_["transpose2"]["Out"] = ScaleAlgo::NONE;
......@@ -42,6 +44,15 @@ MkldnnQuantizerConfig::MkldnnQuantizerConfig() {
rules_["fc"]["W"] = ScaleAlgo::MAX_CH_T;
rules_["fc"]["Bias"] = ScaleAlgo::NONE;
rules_["fc"]["Out"] = ScaleAlgo::KL;
// Reshape2 does not perform calculation on the data and shapes are not
// changed. Scale is calculated on input data and assign to Quantize and
// Dequantize scale.
rules_["reshape2"]["X"] = ScaleAlgo::KL;
rules_["reshape2"]["Shape"] = ScaleAlgo::NONE;
rules_["reshape2"]["ShapeTensor"] = ScaleAlgo::NONE;
rules_["reshape2"]["XShape"] = ScaleAlgo::NONE;
rules_["reshape2"]["Out"] = ScaleAlgo::NONE;
}
ScaleAlgo MkldnnQuantizerConfig::scale_algo(
......
......@@ -269,7 +269,7 @@ TEST(Analyzer_int8_mobilenet_ssd, quantization) {
q_cfg.EnableMkldnnQuantizer();
q_cfg.mkldnn_quantizer_config();
std::unordered_set<std::string> quantize_operators(
{"conv2d", "depthwise_conv2d", "prior_box", "transpose2"});
{"conv2d", "depthwise_conv2d", "prior_box", "transpose2", "reshape2"});
q_cfg.mkldnn_quantizer_config()->SetEnabledOpTypes(quantize_operators);
q_cfg.mkldnn_quantizer_config()->SetWarmupData(warmup_data);
q_cfg.mkldnn_quantizer_config()->SetWarmupBatchSize(FLAGS_warmup_batch_size);
......
......@@ -419,6 +419,13 @@ class Reshape2OpMaker : public ReshapeOpMaker {
"XShape is just used to store the shape and lod of X, which will "
"be used in FlattenGradOp.")
.AsIntermediate();
/* int8 parameters */
AddAttr<bool>("use_quantizer",
"(bool, default false) "
"Set to true for operators that should be quantized and use "
"int8 kernel. "
"Used only on CPU.")
.SetDefault(false);
}
};
......@@ -572,8 +579,9 @@ REGISTER_OPERATOR(reshape2_grad_grad, ops::Reshape2DoubleGradOp,
ops::ReshapeDoubleGradInplaceInToOut);
REGISTER_OP_CPU_KERNEL_FUNCTOR(reshape2, float, ops::ReshapeKernel, double,
ops::ReshapeKernel, int, ops::ReshapeKernel,
int64_t, ops::ReshapeKernel);
ops::ReshapeKernel, int8_t, ops::ReshapeKernel,
uint8_t, ops::ReshapeKernel, int,
ops::ReshapeKernel, int64_t, ops::ReshapeKernel);
REGISTER_OP_CPU_KERNEL_FUNCTOR(reshape2_grad, float, ops::ReshapeGradKernel,
double, ops::ReshapeGradKernel, int,
ops::ReshapeGradKernel, int64_t,
......
......@@ -40,7 +40,6 @@ class TestReshapeOp(OpTest):
self.infered_shape = (12, 10)
def test_check_output(self):
self.check_output(no_check_set=['XShape'])
def test_check_grad(self):
......@@ -185,6 +184,47 @@ class TestReshapeOpDimInfer2_attr_OnlyShape(TestReshapeOp_attr_OnlyShape):
self.shape = (2, 0, 3, -1)
# test int8 data type on CPU
class TestReshapeInt8Op(OpTest):
def setUp(self):
self.init_dtype()
self.init_data()
self.use_mkldnn = True
self._cpu_only = True
self.op_type = "reshape2"
input = np.random.randint(0, 127, self.ori_shape).astype(self.dtype)
self.inputs = {'X': OpTest.np_dtype_to_fluid_dtype(input)}
self.attrs = {
'shape': self.new_shape,
'use_mkldnn': self.use_mkldnn,
}
self.outputs = {
"Out": self.inputs["X"].reshape(self.infered_shape),
'XShape': np.random.random(self.ori_shape).astype(np.float32)
}
def init_dtype(self):
self.dtype = np.int8
def init_data(self):
self.ori_shape = (2, 2, 6)
self.new_shape = (2, 0, 3, -1)
self.infered_shape = (2, 2, 3, -1)
def test_check_output(self):
self.check_output_with_place(
fluid.core.CPUPlace(), atol=1e-5, no_check_set=['XShape'])
def test_check_grad(self):
pass
# test unt8 data type on CPU
class TestReshapeUint8Op(TestReshapeInt8Op):
def init_dtype(self):
self.dtype = np.uint8
# Test python API
class TestReshapeAPI(unittest.TestCase):
# situation 1: have shape( list, no tensor), no actual shape(Tensor)
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册