diff --git a/paddle/fluid/framework/ir/CMakeLists.txt b/paddle/fluid/framework/ir/CMakeLists.txt index ca6b0229e906c0f8bfbf9ee6781013cb4ef7bbce..57cb3911c2228349e55b6b977e914c07f9565659 100644 --- a/paddle/fluid/framework/ir/CMakeLists.txt +++ b/paddle/fluid/framework/ir/CMakeLists.txt @@ -46,6 +46,7 @@ cc_library(fuse_pass_base SRCS fuse_pass_base.cc DEPS pass) pass_library(graph_to_program_pass base) pass_library(graph_viz_pass base) pass_library(lock_free_optimize_pass base) +pass_library(cpu_quantize_squash_pass inference) pass_library(fc_fuse_pass inference) pass_library(attention_lstm_fuse_pass inference) pass_library(infer_clean_graph_pass inference) @@ -100,6 +101,7 @@ cc_test(test_graph_pattern_detector SRCS graph_pattern_detector_tester.cc DEPS g cc_test(test_fc_fuse_pass SRCS fc_fuse_pass_tester.cc DEPS fc_fuse_pass framework_proto) cc_test(test_seqpool_concat_fuse_pass SRCS seqpool_concat_fuse_pass_tester.cc DEPS seqpool_concat_fuse_pass framework_proto) cc_test(test_is_test_pass SRCS is_test_pass_tester.cc DEPS is_test_pass) +cc_test(test_cpu_quantize_squash_pass SRCS cpu_quantize_squash_pass_tester.cc DEPS cpu_quantize_squash_pass naive_executor) if (WITH_MKLDNN) cc_test(test_depthwise_conv_mkldnn_pass SRCS mkldnn/depthwise_conv_mkldnn_pass_tester.cc DEPS depthwise_conv_mkldnn_pass) cc_test(test_conv_bias_mkldnn_fuse_pass SRCS mkldnn/conv_bias_mkldnn_fuse_pass_tester.cc DEPS conv_bias_mkldnn_fuse_pass naive_executor) diff --git a/paddle/fluid/framework/ir/cpu_quantize_squash_pass.cc b/paddle/fluid/framework/ir/cpu_quantize_squash_pass.cc new file mode 100644 index 0000000000000000000000000000000000000000..de62a69de4f25912c5f56973de0dca5343bbe906 --- /dev/null +++ b/paddle/fluid/framework/ir/cpu_quantize_squash_pass.cc @@ -0,0 +1,146 @@ +// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file eint8_outcept in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either eint8_outpress or +// implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/fluid/framework/ir/cpu_quantize_squash_pass.h" +#include +#include +#include "paddle/fluid/platform/enforce.h" +#include "paddle/fluid/string/pretty_log.h" + +namespace paddle { +namespace framework { +namespace ir { + +using string::PrettyLogDetail; + +void CPUQuantizeSquashPass::FindNodesToKeep( + Graph* graph, + std::unordered_map* nodes_keep_counter) const { + GraphPatternDetector gpd; + patterns::DequantAny deq_any_pattern{gpd.mutable_pattern(), "deqant_any"}; + deq_any_pattern(); + + int found_count = 0; + auto handler = [&](const GraphPatternDetector::subgraph_t& subgraph, + Graph* g) { + GET_IR_NODE_FROM_SUBGRAPH(dequant_out, dequant_out, deq_any_pattern); + + if (nodes_keep_counter->find(dequant_out) == nodes_keep_counter->end()) + (*nodes_keep_counter)[dequant_out] = 1; + else + (*nodes_keep_counter)[dequant_out] += 1; + + found_count++; + }; + gpd(graph, handler); + AddStatis(found_count); +} + +void CPUQuantizeSquashPass::Squash( + Graph* graph, + std::unordered_map* nodes_keep_counter) const { + GraphPatternDetector gpd; + patterns::DequantQuantAny squash_pattern{gpd.mutable_pattern(), "squash"}; + squash_pattern(); + + int found_squash_count = 0; + auto handler = [&](const GraphPatternDetector::subgraph_t& subgraph, + Graph* g) { + VLOG(4) << "squash requantize-quantize ops pair"; + + GET_IR_NODE_FROM_SUBGRAPH(dequant_in, dequant_in, squash_pattern); + GET_IR_NODE_FROM_SUBGRAPH(dequant_op, dequant_op, squash_pattern); + GET_IR_NODE_FROM_SUBGRAPH(dequant_out, dequant_out, squash_pattern); + GET_IR_NODE_FROM_SUBGRAPH(quant_op, quant_op, squash_pattern); + GET_IR_NODE_FROM_SUBGRAPH(quant_out, quant_out, squash_pattern); + GET_IR_NODE_FROM_SUBGRAPH(next_op, next_op, squash_pattern); + + auto* next_op_desc = next_op->Op(); + float dequant_scale = boost::get(dequant_op->Op()->GetAttr("Scale")); + float quant_scale = boost::get(quant_op->Op()->GetAttr("Scale")); + PADDLE_ENFORCE(nodes_keep_counter->find(dequant_out) != + nodes_keep_counter->end()); + + // check if dequantize op should be kept or removed, decrease the counter + bool keep_dequant = (*nodes_keep_counter)[dequant_out]-- > 1; + + if (dequant_scale == quant_scale) { + // squash dequantize-quantize to nothing + auto quant_out_var_name = quant_out->Name(); + auto next_op_inputs = next_op_desc->InputNames(); + for (const auto& name : next_op_inputs) { + auto var_name = next_op_desc->Input(name)[0]; + if (var_name.compare(quant_out_var_name) == 0) { + next_op_desc->SetInput( + name, std::vector({dequant_in->Name()})); + break; + } + } + + if (keep_dequant) + GraphSafeRemoveNodes(graph, {quant_op, quant_out}); + else + GraphSafeRemoveNodes(graph, + {dequant_op, quant_op, dequant_out, quant_out}); + + IR_NODE_LINK_TO(dequant_in, next_op); + + found_squash_count++; + } else { + // squash dequantize-quantize to requantize op + OpDesc desc; + desc.SetType("requantize"); + desc.SetInput("Input", std::vector({dequant_in->Name()})); + desc.SetOutput("Output", std::vector({quant_out->Name()})); + desc.SetAttr("Scale_in", dequant_scale); + desc.SetAttr("Scale_out", quant_scale); + + auto requant_op = g->CreateOpNode(&desc); + + if (keep_dequant) + GraphSafeRemoveNodes(graph, {quant_op}); + else + GraphSafeRemoveNodes(graph, {dequant_op, quant_op, dequant_out}); + + IR_NODE_LINK_TO(dequant_in, requant_op); + IR_NODE_LINK_TO(requant_op, quant_out); + + found_squash_count++; + } + }; + gpd(graph, handler); + AddStatis(found_squash_count); + PrettyLogDetail("--- squashed %d dequantize-quantize pairs", + found_squash_count); +} + +std::unique_ptr CPUQuantizeSquashPass::ApplyImpl( + std::unique_ptr graph) const { + PADDLE_ENFORCE(graph.get()); + FusePassBase::Init("cpu_quantize_squash_pass", graph.get()); + + std::unordered_map nodes_keep_counter; + FindNodesToKeep(graph.get(), &nodes_keep_counter); + Squash(graph.get(), &nodes_keep_counter); + + return graph; +} + +} // namespace ir +} // namespace framework +} // namespace paddle + +REGISTER_PASS(cpu_quantize_squash_pass, + paddle::framework::ir::CPUQuantizeSquashPass); diff --git a/paddle/fluid/framework/ir/cpu_quantize_squash_pass.h b/paddle/fluid/framework/ir/cpu_quantize_squash_pass.h new file mode 100644 index 0000000000000000000000000000000000000000..b823a2cef35b2f9994df9c9473246db3d69843e7 --- /dev/null +++ b/paddle/fluid/framework/ir/cpu_quantize_squash_pass.h @@ -0,0 +1,58 @@ +// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include +#include +#include +#include "paddle/fluid/framework/ir/fuse_pass_base.h" +#include "paddle/fluid/framework/ir/graph.h" +#include "paddle/fluid/framework/ir/graph_pattern_detector.h" +#include "paddle/fluid/framework/ir/pass.h" + +namespace paddle { +namespace framework { +namespace ir { + +/* + * Squash dequantize->quantize pair pattern into requantize op + */ +class CPUQuantizeSquashPass : public FusePassBase { + public: + virtual ~CPUQuantizeSquashPass() {} + + protected: + std::unique_ptr ApplyImpl( + std::unique_ptr graph) const override; + + /* + * For each dequantize's output find the number of operators it is an input to + */ + void FindNodesToKeep( + Graph* graph, + std::unordered_map* nodes_keep_counter) const; + + /* + * Squash dequantize-quantize ops pairs into requantize or nothing + */ + void Squash(Graph* graph, + std::unordered_map* nodes_keep_counter) const; + + const std::string name_scope_{"squash"}; +}; + +} // namespace ir +} // namespace framework +} // namespace paddle diff --git a/paddle/fluid/framework/ir/cpu_quantize_squash_pass_tester.cc b/paddle/fluid/framework/ir/cpu_quantize_squash_pass_tester.cc new file mode 100644 index 0000000000000000000000000000000000000000..3a3eb53f79955b37f5f9af6a09b2f9c8e934aa3e --- /dev/null +++ b/paddle/fluid/framework/ir/cpu_quantize_squash_pass_tester.cc @@ -0,0 +1,179 @@ +// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/fluid/framework/ir/cpu_quantize_squash_pass.h" +#include +#include "paddle/fluid/framework/naive_executor.h" +#include "paddle/fluid/platform/place.h" + +namespace paddle { +namespace framework { +namespace ir { + +void SetOp(ProgramDesc* prog, const std::string& type, const std::string& name, + const std::vector& inputs, + const std::vector& outputs, bool use_mkldnn, + float scale = 0) { + auto* op = prog->MutableBlock(0)->AppendOp(); + op->SetType(type); + op->SetAttr("use_mkldnn", use_mkldnn); + op->SetAttr("name", name); + if (type == "conv2d") { + op->SetInput("Input", {inputs[0]}); + if (inputs.size() > 1) op->SetInput("Filter", {inputs[1]}); + if (inputs.size() > 2) op->SetInput("Bias", {inputs[2]}); + op->SetOutput("Output", {outputs[0]}); + } else if (type == "quantize") { + op->SetInput("Input", {inputs[0]}); + op->SetOutput("Output", {outputs[0]}); + op->SetAttr("Scale", scale); + } else if (type == "dequantize") { + op->SetInput("Input", {inputs[0]}); + op->SetOutput("Output", {outputs[0]}); + op->SetAttr("Scale", scale); + } +} + +// (a,w1,b1)->Conv1->d +// d->Dequant->e +// e->Quant->f +// (f,w2,b2)->Conv2->i +ProgramDesc BuildProgramDesc(bool use_mkldnn, float scale1, float scale2) { + ProgramDesc prog; + for (auto& v : std::initializer_list( + {"a", "w1", "b1", "d", "e", "f", "w2", "b2", "i"})) { + auto* var = prog.MutableBlock(0)->Var(v); + if (v.find("w") == 0 || v.find("b") == 0) { + var->SetPersistable(true); + } + } + + SetOp(&prog, "conv2d", "Conv1", {"a", "w1", "b1"}, {"d"}, use_mkldnn); + SetOp(&prog, "dequantize", "Dequant", {"d"}, {"e"}, use_mkldnn, scale1); + SetOp(&prog, "quantize", "Quant", {"e"}, {"f"}, use_mkldnn, scale2); + SetOp(&prog, "conv2d", "Conv2", {"f", "w2", "b2"}, {"i"}, use_mkldnn); + return prog; +} + +static const std::initializer_list variable_names{ + "a", "b", "c", "d", "e", "f", "g", "h"}; +// a->Conv1->b +// b->Dequant->c +// +// c->Quant1->d and d->Conv2->e +// +// c->Conv3->f +// +// c->Quant2->g and g->Conv4->h +// +ProgramDesc BuildProgramDesc2(bool use_mkldnn, float scale1, float scale2, + float scale3) { + ProgramDesc prog; + for (auto& v : variable_names) { + prog.MutableBlock(0)->Var(v); + } + + SetOp(&prog, "conv2d", "Conv1", {"a"}, {"b"}, use_mkldnn); + SetOp(&prog, "dequantize", "Dequant", {"b"}, {"c"}, use_mkldnn, scale1); + + SetOp(&prog, "quantize", "Quant1", {"c"}, {"d"}, use_mkldnn, scale2); + SetOp(&prog, "conv2d", "Conv2", {"d"}, {"e"}, use_mkldnn); + + SetOp(&prog, "conv2d", "Conv3", {"c"}, {"f"}, use_mkldnn); + + SetOp(&prog, "quantize", "Quant2", {"c"}, {"g"}, use_mkldnn, scale3); + SetOp(&prog, "conv2d", "Conv4", {"g"}, {"h"}, use_mkldnn); + + return prog; +} + +void InitTensorHolder(Scope* scope, const paddle::platform::Place& place, + const char* var_name) { + auto x = scope->Var(var_name); + auto tensor = x->GetMutable(); + tensor->mutable_data(place, proto::VarType::FP32, + ::paddle::memory::Allocator::kDefault, 1); +} + +void MainTest(const ProgramDesc& prog, int removed_nodes_num) { + std::unique_ptr graph(new ir::Graph(prog)); + + // Init scope, as it is used in pass + auto place = paddle::platform::CPUPlace(); + NaiveExecutor exe{place}; + Scope scope; + exe.CreateVariables(prog, 0, true, &scope); + + for (auto& v : variable_names) { + InitTensorHolder(&scope, place, v.c_str()); + } + + graph->Set(kParamScopeAttr, new framework::Scope*(&scope)); + + auto pass = PassRegistry::Instance().Get("cpu_quantize_squash_pass"); + + int original_nodes_num = graph->Nodes().size(); + + graph = pass->Apply(std::move(graph)); + + int current_nodes_num = graph->Nodes().size(); + + EXPECT_EQ(original_nodes_num - removed_nodes_num, current_nodes_num); +} + +TEST(CpuQuantizeSquashPass, equal_scales) { + auto scale = 1.2345f; + auto use_mkldnn = true; + // Remove 4 nodes: Dequant, Quant, e, f + auto remove_nodes = 4; + MainTest(BuildProgramDesc(use_mkldnn, scale, scale), remove_nodes); + + use_mkldnn = !use_mkldnn; + MainTest(BuildProgramDesc(use_mkldnn, scale, scale), remove_nodes); +} + +TEST(CpuQuantizeSquashPass, inequal_scales) { + auto scale1 = 1.2345f; + auto scale2 = 21.0f; + auto use_mkldnn = true; + // Remove 3 nodes: Dequant, Quant, e + // Insert 1 node: requantize + auto remove_nodes = 2; + MainTest(BuildProgramDesc(use_mkldnn, scale1, scale2), remove_nodes); + + use_mkldnn = !use_mkldnn; + MainTest(BuildProgramDesc(use_mkldnn, scale1, scale2), remove_nodes); +} + +TEST(CpuQuantizeSquashPass, branch_to_equal_inequal_and_fp32) { + // Delete both quantize ops, + // bypass dequantize in both branches, + // insert requantize on one branch + auto scale = 1.2345f; + auto scale2 = 21.0f; + auto use_mkldnn = true; + // Remove 3 nodes: Quant1, Quant2, g + // Insert 1 node: requantize + auto remove_nodes = 2; + MainTest(BuildProgramDesc2(use_mkldnn, scale, scale, scale2), remove_nodes); + + use_mkldnn = !use_mkldnn; + MainTest(BuildProgramDesc2(use_mkldnn, scale, scale, scale2), remove_nodes); +} + +} // namespace ir +} // namespace framework +} // namespace paddle + +USE_PASS(cpu_quantize_squash_pass); diff --git a/paddle/fluid/framework/ir/graph_pattern_detector.cc b/paddle/fluid/framework/ir/graph_pattern_detector.cc index c0c34d186b00814fe6c6fd42beb78133233a1357..08354b526a04626152e8e4df9354d34222347cae 100644 --- a/paddle/fluid/framework/ir/graph_pattern_detector.cc +++ b/paddle/fluid/framework/ir/graph_pattern_detector.cc @@ -1301,6 +1301,51 @@ PDNode *patterns::ConvAffineChannel::operator()( return ac_out_var; } +PDNode *patterns::DequantQuantAny::operator()() { + auto *dequant_in = pattern->NewNode(dequant_in_repr()) + ->AsInput() + ->assert_is_op_input("dequantize", "Input"); + + auto *dequant_op = + pattern->NewNode(dequant_op_repr())->assert_is_op("dequantize"); + + auto *dequant_out = pattern->NewNode(dequant_out_repr()) + ->AsOutput() + ->assert_is_op_output("dequantize", "Output"); + + auto *quant_op = pattern->NewNode(quant_op_repr()) + ->assert_is_op("quantize") + ->AsIntermediate(); + + auto *quant_out = pattern->NewNode(quant_out_repr()) + ->AsOutput() + ->assert_is_op_output("quantize"); + + auto *next_op = pattern->NewNode(next_op_repr())->assert_is_op(); + + dequant_op->LinksFrom({dequant_in}).LinksTo({dequant_out}); + quant_op->LinksFrom({dequant_out}).LinksTo({quant_out}); + next_op->LinksFrom({quant_out}); + + return quant_out; +} + +PDNode *patterns::DequantAny::operator()() { + auto *dequant_op = + pattern->NewNode(dequant_op_repr())->assert_is_op("dequantize"); + + auto *dequant_out = pattern->NewNode(dequant_out_repr()) + ->AsOutput() + ->assert_is_op_output("dequantize", "Output"); + + auto *next_op = pattern->NewNode(next_op_repr())->assert_is_op(); + + dequant_op->LinksTo({dequant_out}); + next_op->LinksFrom({dequant_out}); + + return dequant_out; +} + // a -> transpose_op(1) -> transpose_out_a -> flatten_op(1) -> flatten_out_a // b -> transpose_op(2) -> transpose_out_b -> flatten_op(2) -> flatten_out_b // ... diff --git a/paddle/fluid/framework/ir/graph_pattern_detector.h b/paddle/fluid/framework/ir/graph_pattern_detector.h index c8be586f546dc604375401b13a801841efbf08d2..3db4bba10d6c722db6bace3ace407c94d5826387 100644 --- a/paddle/fluid/framework/ir/graph_pattern_detector.h +++ b/paddle/fluid/framework/ir/graph_pattern_detector.h @@ -18,8 +18,11 @@ #include #endif +#include #include #include +#include +#include #include #include #include "paddle/fluid/framework/ir/graph.h" @@ -766,6 +769,34 @@ struct ConvAffineChannel : public PatternBase { PATTERN_DECL_NODE(ac_out); // Out }; +// Dequantize + Quantize + anyOP +// This pattern is used for squashing the dequantize-quantize pairs. +struct DequantQuantAny : public PatternBase { + DequantQuantAny(PDPattern* pattern, const std::string& name_scope) + : PatternBase(pattern, name_scope, "dequant_quant_any") {} + PDNode* operator()(); + + PATTERN_DECL_NODE(dequant_in); + PATTERN_DECL_NODE(dequant_op); + PATTERN_DECL_NODE(dequant_out); + PATTERN_DECL_NODE(quant_op); + PATTERN_DECL_NODE(quant_out); + PATTERN_DECL_NODE(next_op); +}; + +// Dequantize + anyOP +// This quantize is used for getting number of ops the Dequantize's +// output is an input to. +struct DequantAny : public PatternBase { + DequantAny(PDPattern* pattern, const std::string& name_scope) + : PatternBase(pattern, name_scope, "dequant_any") {} + PDNode* operator()(); + + PATTERN_DECL_NODE(dequant_op); + PATTERN_DECL_NODE(dequant_out); + PATTERN_DECL_NODE(next_op); +}; + struct TransposeFlattenConcat : public PatternBase { TransposeFlattenConcat(PDPattern* pattern, const std::string& name_scope) : PatternBase(pattern, name_scope, "transpose_flatten_concat") {}