diff --git a/paddle/fluid/framework/ir/CMakeLists.txt b/paddle/fluid/framework/ir/CMakeLists.txt index 0bbe5e6cc56c07a3777bc7c835cdb39859410880..873868c4ac8afbe428504733b93f8a6498eec806 100755 --- a/paddle/fluid/framework/ir/CMakeLists.txt +++ b/paddle/fluid/framework/ir/CMakeLists.txt @@ -89,7 +89,7 @@ pass_library(conv_elementwise_add2_act_fuse_pass inference) pass_library(conv_elementwise_add_fuse_pass inference) pass_library(transpose_flatten_concat_fuse_pass inference) pass_library(inplace_op_var_pass inference) -pass_library(identity_scale_op_clean_pass base) +pass_library(identity_op_clean_pass base) pass_library(sync_batch_norm_pass base) pass_library(runtime_context_cache_pass base) pass_library(quant_conv2d_dequant_fuse_pass inference) @@ -102,7 +102,6 @@ pass_library(delete_weight_dequant_linear_op_pass inference) pass_library(delete_quant_dequant_linear_op_pass inference) pass_library(delete_dropout_op_pass inference) pass_library(delete_concat_op_pass inference) -pass_library(delete_c_identity_op_pass inference) pass_library(preln_residual_bias_fuse_pass inference) pass_library(constant_folding_pass inference) pass_library(auto_mixed_precision_pass inference) diff --git a/paddle/fluid/framework/ir/delete_c_identity_op_pass.cc b/paddle/fluid/framework/ir/delete_c_identity_op_pass.cc deleted file mode 100644 index 5c8f6745b992843be983311669b47595d44e5b2a..0000000000000000000000000000000000000000 --- a/paddle/fluid/framework/ir/delete_c_identity_op_pass.cc +++ /dev/null @@ -1,129 +0,0 @@ -// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -#include "paddle/fluid/framework/ir/delete_c_identity_op_pass.h" - -#include - -#include "paddle/fluid/framework/op_version_registry.h" - -namespace paddle { -namespace framework { -namespace ir { - -namespace patterns { - -void DeleteCIdentityOpPattern::operator()() { - auto any_op_out = pattern->NewNode(any_op_out_repr()) - ->assert_is_op_input("c_identity", "X") - ->AsInput(); - auto c_identity_op = - pattern->NewNode(c_identity_op_repr())->assert_is_op("c_identity"); - - auto c_identity_op_out = pattern->NewNode(c_identity_op_out_repr()) - ->assert_is_op_output("c_identity", "Out") - ->AsIntermediate(); - - auto any_op2 = pattern->NewNode(any_op2_repr())->assert_is_op()->AsOutput(); - - c_identity_op->LinksFrom({any_op_out}); - c_identity_op_out->LinksFrom({c_identity_op}); - any_op2->LinksFrom({c_identity_op_out}); -} - -} // namespace patterns - -#define GET_IR_NODE(node__) GET_IR_NODE_FROM_SUBGRAPH(node__, node__, pattern); -#define GET_NODES \ - GET_IR_NODE(any_op_out); \ - GET_IR_NODE(c_identity_op); \ - GET_IR_NODE(c_identity_op_out); \ - GET_IR_NODE(any_op2); - -void DeleteCIdentityOpPass::ApplyImpl(ir::Graph* graph) const { - const std::string pattern_name = "delete_c_identity_op_pattern"; - FusePassBase::Init(pattern_name, graph); - - GraphPatternDetector gpd; - - patterns::DeleteCIdentityOpPattern pattern(gpd.mutable_pattern(), - pattern_name); - pattern(); - - auto handler = [&](const GraphPatternDetector::subgraph_t& subgraph, - Graph* g) { - GET_NODES; - IR_NODE_LINK_TO(any_op_out, any_op2); - std::string any_op_out_name = any_op_out->Var()->Name(); - std::string c_identity_op_out_name = c_identity_op_out->Var()->Name(); - - auto* any_op2_desc = any_op2->Op(); - auto var_map = any_op2_desc->Inputs(); - std::string arg_name = ""; - for (auto& name_m : var_map) { - if (std::find(name_m.second.begin(), - name_m.second.end(), - c_identity_op_out_name) != name_m.second.end()) { - arg_name = name_m.first; - } - } - if (arg_name.size() == 0) { - LOG(INFO) << "Delete c_identity op pass: can not find the input " - << c_identity_op_out_name; - return; - } - - // modify the any_op2's inputs - for (auto& name_m : var_map) { - if (std::find(name_m.second.begin(), - name_m.second.end(), - c_identity_op_out_name) != name_m.second.end()) { - std::vector new_inputs; - for (auto& i_n : name_m.second) { - if (i_n != c_identity_op_out_name) { - new_inputs.push_back(i_n); - } - } - new_inputs.push_back(any_op_out_name); - any_op2_desc->SetInput(name_m.first, new_inputs); - any_op2_desc->Flush(); - } - } - any_op2_desc->Flush(); - // Delete the unneeded nodes. - GraphSafeRemoveNodes(graph, {c_identity_op, c_identity_op_out}); - }; - - gpd(graph, handler); -} - -DeleteCIdentityOpPass::DeleteCIdentityOpPass() { - AddOpCompat(OpCompat("c_identity")) - .AddInput("X") - .IsTensor() - .End() - .AddOutput("Out") - .IsTensor() - .End(); -} - -} // namespace ir -} // namespace framework -} // namespace paddle - -REGISTER_PASS(delete_c_identity_op_pass, - paddle::framework::ir::DeleteCIdentityOpPass); -REGISTER_PASS_CAPABILITY(delete_c_identity_op_pass) - .AddCombination( - paddle::framework::compatible::OpVersionComparatorCombination().LE( - "c_identity", 1)); diff --git a/paddle/fluid/framework/ir/delete_c_identity_op_pass.h b/paddle/fluid/framework/ir/delete_c_identity_op_pass.h deleted file mode 100644 index 84131c428d9b33f5fc0de29a9865fc8cf2ebe1f4..0000000000000000000000000000000000000000 --- a/paddle/fluid/framework/ir/delete_c_identity_op_pass.h +++ /dev/null @@ -1,52 +0,0 @@ -// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#pragma once -#include - -#include "paddle/fluid/framework/ir/fuse_pass_base.h" -#include "paddle/fluid/framework/ir/graph_pattern_detector.h" - -namespace paddle { -namespace framework { -namespace ir { - -namespace patterns { -struct DeleteCIdentityOpPattern : public PatternBase { - DeleteCIdentityOpPattern(PDPattern* pattern, const std::string& name_scope) - : PatternBase(pattern, name_scope, "delete_c_identity_op_pattern") {} - - void operator()(); - - PATTERN_DECL_NODE(any_op_out); - PATTERN_DECL_NODE(c_identity_op); - PATTERN_DECL_NODE(c_identity_op_out); - PATTERN_DECL_NODE(any_op2); -}; -} // namespace patterns - -class Graph; - -class DeleteCIdentityOpPass : public FusePassBase { - public: - DeleteCIdentityOpPass(); - virtual ~DeleteCIdentityOpPass() {} - - protected: - void ApplyImpl(ir::Graph* graph) const override; -}; - -} // namespace ir -} // namespace framework -} // namespace paddle diff --git a/paddle/fluid/framework/ir/identity_scale_op_clean_pass.cc b/paddle/fluid/framework/ir/identity_op_clean_pass.cc similarity index 51% rename from paddle/fluid/framework/ir/identity_scale_op_clean_pass.cc rename to paddle/fluid/framework/ir/identity_op_clean_pass.cc index e6c20ce36d461dcb18f37eec2431048375f5e012..720198576d8a88a8dd1adc21134532b779e24770 100644 --- a/paddle/fluid/framework/ir/identity_scale_op_clean_pass.cc +++ b/paddle/fluid/framework/ir/identity_op_clean_pass.cc @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "paddle/fluid/framework/ir/identity_scale_op_clean_pass.h" +#include "paddle/fluid/framework/ir/identity_op_clean_pass.h" #include "paddle/fluid/framework/ir/graph_pattern_detector.h" #include "paddle/fluid/framework/op_version_registry.h" @@ -23,17 +23,16 @@ namespace ir { class Graph; -void IdentityScaleOpCleanPass::ApplyImpl(ir::Graph* graph) const { +void IdentityOpCleanPass::ApplyImpl(ir::Graph* graph) const { FusePassBase::Init("identity_scale_op_clean", graph); - // pre_op -> scale_in -> scale_op -> scale_out + // pre_op -> useless_op_in -> useless_op -> useless_op_out // -> - // pre_op -> scale_out + // pre_op -> useless_op_out GraphPatternDetector detector; - auto scale_in = + auto useless_op_in = detector.mutable_pattern() - ->NewNode("scale_in") - ->assert_is_op_input("scale") + ->NewNode("useless_op_in") ->assert_has_n_outputs(1) ->assert_var_not_persistable() ->assert_more([](Node* x) { @@ -45,27 +44,51 @@ void IdentityScaleOpCleanPass::ApplyImpl(ir::Graph* graph) const { } return true; }); - auto scale_op = detector.mutable_pattern() - ->NewNode("scale_fuse") - ->assert_is_op("scale") - ->assert_op_attr("scale", 1.) - ->assert_op_attr("bias", 0.); - auto scale_out = detector.mutable_pattern() - ->NewNode("scale_out") - ->assert_is_op_output("scale"); - scale_op->LinksFrom({scale_in}).LinksTo({scale_out}); + // This useless_op must have only one input and one output! + auto useless_op = + detector.mutable_pattern() + ->NewNode("useless_op") + ->assert_has_n_inputs(1) + ->assert_has_n_outputs(1) + ->assert_more([](Node* x) { + if (!x->IsOp()) { + return false; + } + if (x->Op()->Type() == "scale") { + auto scale = x->Op()->GetAttrIfExists("scale"); + auto bias = x->Op()->GetAttrIfExists("bias"); + if (std::abs(bias) <= 1e-6 && std::abs(scale - 1) <= 1e-6) { + return true; + } + } + if (x->Op()->Type() == "cast") { + auto in_dtype = x->Op()->GetAttrIfExists("in_dtype"); + auto out_dtype = x->Op()->GetAttrIfExists("out_dtype"); + if (in_dtype == out_dtype) { + return true; + } + } + if (x->Op()->Type() == "c_identity") { + return true; + } + // you can add more cases here. + return false; + }); + auto useless_op_out = detector.mutable_pattern()->NewNode("useless_op_out"); + + useless_op->LinksFrom({useless_op_in}).LinksTo({useless_op_out}); int found_subgraph_count = 0; GraphPatternDetector::handle_t handler = [&](const GraphPatternDetector::subgraph_t& subgraph, Graph* graph) { - Node* scale_op_var = subgraph.at(scale_op); - Node* scale_in_var = subgraph.at(scale_in); - Node* scale_out_var = subgraph.at(scale_out); - const std::string scale_in_name = scale_in_var->Name(); - const std::string scale_out_name = scale_out_var->Name(); + Node* useless_op_var = subgraph.at(useless_op); + Node* useless_op_in_var = subgraph.at(useless_op_in); + Node* useless_op_out_var = subgraph.at(useless_op_out); + const std::string useless_op_in_name = useless_op_in_var->Name(); + const std::string useless_op_out_name = useless_op_out_var->Name(); // Remove links in graph - GraphSafeRemoveNodes(graph, {scale_in_var, scale_op_var}); + GraphSafeRemoveNodes(graph, {useless_op_in_var, useless_op_var}); // Modify pre_op_desc // Link pre_op directly to scale_out for (auto& node : graph->Nodes()) { @@ -76,16 +99,16 @@ void IdentityScaleOpCleanPass::ApplyImpl(ir::Graph* graph) const { auto names = out_var_map.second; bool reset = false; for (size_t i = 0; i < names.size(); i++) { - if (names[i] == scale_in_name) { + if (names[i] == useless_op_in_name) { reset = true; - names[i] = scale_out_name; + names[i] = useless_op_out_name; break; } } if (reset) { op_desc->SetOutput(out_var_map.first, names); op_desc->Flush(); - IR_NODE_LINK_TO(node, scale_out_var); + IR_NODE_LINK_TO(node, useless_op_out_var); break; } } @@ -102,9 +125,10 @@ void IdentityScaleOpCleanPass::ApplyImpl(ir::Graph* graph) const { } // namespace framework } // namespace paddle -REGISTER_PASS(identity_scale_op_clean_pass, - paddle::framework::ir::IdentityScaleOpCleanPass); -REGISTER_PASS_CAPABILITY(identity_scale_op_clean_pass) +REGISTER_PASS(identity_op_clean_pass, + paddle::framework::ir::IdentityOpCleanPass); +REGISTER_PASS_CAPABILITY(identity_op_clean_pass) .AddCombination( - paddle::framework::compatible::OpVersionComparatorCombination().EQ( - "scale", 0)); + paddle::framework::compatible::OpVersionComparatorCombination() + .EQ("scale", 0) + .LE("c_identity", 1)); diff --git a/paddle/fluid/framework/ir/identity_scale_op_clean_pass.h b/paddle/fluid/framework/ir/identity_op_clean_pass.h similarity index 89% rename from paddle/fluid/framework/ir/identity_scale_op_clean_pass.h rename to paddle/fluid/framework/ir/identity_op_clean_pass.h index 7e3d4e19fa84deccce889d18a7840baeff21cfb3..76003befb65a52b3474b776e964d753899406d5b 100644 --- a/paddle/fluid/framework/ir/identity_scale_op_clean_pass.h +++ b/paddle/fluid/framework/ir/identity_op_clean_pass.h @@ -22,12 +22,12 @@ namespace ir { class Graph; -class IdentityScaleOpCleanPass : public FusePassBase { +class IdentityOpCleanPass : public FusePassBase { protected: void ApplyImpl(ir::Graph* graph) const override; private: - virtual ~IdentityScaleOpCleanPass() = default; + virtual ~IdentityOpCleanPass() = default; }; } // namespace ir diff --git a/paddle/fluid/framework/ir/pass.cc b/paddle/fluid/framework/ir/pass.cc index 9e61210e623ab16f5312b35d425d9b4e5dc63a11..e5ee2dc274930964e3e1a47aff42f9ecf8804655 100644 --- a/paddle/fluid/framework/ir/pass.cc +++ b/paddle/fluid/framework/ir/pass.cc @@ -54,7 +54,7 @@ static const std::vector support_subgraph_passes = { static const std::vector xpu_support_subgraph_passes = { "delete_dropout_op_pass", "delete_concat_op_pass", - "identity_scale_op_clean_pass", + "identity_op_clean_pass", "delete_op_device_pass", "constant_folding_pass", "delete_elementwise_mul_op_pass", diff --git a/paddle/fluid/inference/api/paddle_pass_builder.cc b/paddle/fluid/inference/api/paddle_pass_builder.cc index 79c7bc7c1cdcef158fd950eb15b20c6d07c04c7d..e13cf391b7a405e13499c51f217da65bcee2a3f0 100644 --- a/paddle/fluid/inference/api/paddle_pass_builder.cc +++ b/paddle/fluid/inference/api/paddle_pass_builder.cc @@ -94,12 +94,11 @@ const std::vector kTRTSubgraphPasses({ "delete_quant_dequant_filter_op_pass", // "trt_delete_weight_dequant_linear_op_pass", // "delete_quant_dequant_linear_op_pass", // - "identity_scale_op_clean_pass", // + "identity_op_clean_pass", // "add_support_int8_pass", // "simplify_with_basic_ops_pass", // "trt_embedding_eltwise_layernorm_fuse_pass", // "preln_embedding_eltwise_layernorm_fuse_pass", // - "delete_c_identity_op_pass", // "trt_multihead_matmul_fuse_pass_v2", // "trt_multihead_matmul_fuse_pass_v3", // "multihead_matmul_roformer_fuse_pass", // @@ -175,7 +174,7 @@ const std::vector kLiteSubgraphPasses({ // running errors. After fusion operator supports low precision, delete this. const std::vector kGpuLowerPrecisionPasses{ "map_op_to_another_pass", - "identity_scale_op_clean_pass", + "identity_op_clean_pass", "simplify_with_basic_ops_pass", "silu_fuse_pass", "delete_quant_dequant_linear_op_pass", @@ -222,7 +221,7 @@ const std::vector kCINNCompilerPasses{ GpuPassStrategy::GpuPassStrategy() : PassStrategy({}) { passes_.assign({ "map_op_to_another_pass", // - "identity_scale_op_clean_pass", // + "identity_op_clean_pass", // "is_test_pass", // "simplify_with_basic_ops_pass", // "delete_quant_dequant_linear_op_pass", // @@ -511,7 +510,7 @@ XpuPassStrategy::XpuPassStrategy() : PassStrategy({}) { passes_.assign({ "delete_dropout_op_pass", "delete_concat_op_pass", - "identity_scale_op_clean_pass", + "identity_op_clean_pass", "delete_repeated_ops_pass", "delete_op_device_pass", "constant_folding_pass", diff --git a/test/ir/inference/test_delete_c_identity_op_pass.py b/test/ir/inference/test_delete_c_identity_op_pass.py index 9b7027a19697266ff860af8a780347a4854f62ab..e79b2bfa488ee9f0e79c4d5116ae6ed9bada927d 100644 --- a/test/ir/inference/test_delete_c_identity_op_pass.py +++ b/test/ir/inference/test_delete_c_identity_op_pass.py @@ -56,7 +56,7 @@ class TestDeleteCIdentityPass(PassAutoScanTest): self.run_and_statis( max_examples=2, min_success_num=2, - passes=["delete_c_identity_op_pass"], + passes=["identity_op_clean_pass"], ) diff --git a/test/ir/inference/test_identity_scale_clean_pass.py b/test/ir/inference/test_identity_clean_pass.py similarity index 80% rename from test/ir/inference/test_identity_scale_clean_pass.py rename to test/ir/inference/test_identity_clean_pass.py index 5017e1cb30c65dab87ee746ebe4770fcaa2bec25..5a39aad833e810864381cecd59cbb4f379991728 100644 --- a/test/ir/inference/test_identity_scale_clean_pass.py +++ b/test/ir/inference/test_identity_clean_pass.py @@ -18,20 +18,10 @@ import hypothesis.strategies as st from auto_scan_test import PassAutoScanTest from program_config import OpConfig, ProgramConfig, TensorConfig -import paddle.inference as paddle_infer - class TestIdentityScaleCleanPass(PassAutoScanTest): def sample_predictor_configs(self, program_config): - config = self.create_trt_inference_config() - config.enable_tensorrt_engine( - max_batch_size=8, - workspace_size=0, - min_subgraph_size=0, - precision_mode=paddle_infer.PrecisionType.Float32, - use_static=False, - use_calib_mode=False, - ) + config = self.create_inference_config(use_gpu=True) yield config, ['relu'], (1e-5, 1e-5) def sample_program_config(self, draw): @@ -61,9 +51,7 @@ class TestIdentityScaleCleanPass(PassAutoScanTest): return program_config def test(self): - self.run_and_statis( - max_examples=25, passes=["identity_scale_op_clean_pass"] - ) + self.run_and_statis(max_examples=25, passes=["identity_op_clean_pass"]) if __name__ == "__main__": diff --git a/test/ir/inference/test_trt_convert_range.py b/test/ir/inference/test_trt_convert_range.py index 42c00181f24e4ab80bc694f0f8e28e17912fb914..5ef6b8b90524d860b6f36547278606d3cabd89bd 100644 --- a/test/ir/inference/test_trt_convert_range.py +++ b/test/ir/inference/test_trt_convert_range.py @@ -156,7 +156,7 @@ class TrtConvertRangeStaticTest(TrtLayerAutoScanTest): def generate_input2(): return np.array([1]).astype(np.int32) - for in_dtype in [2, 5]: + for in_dtype in [2]: self.in_dtype = in_dtype dics = [{}] ops_config = [ diff --git a/test/ir/inference/test_xpu_delete_repeated_ops_pass.py b/test/ir/inference/test_xpu_delete_repeated_ops_pass.py index b9e789ac8b6116ca9e4950807aaac840217cc53c..d05e529cb31ae11d5a4f6efbbb60275dd696f0f6 100644 --- a/test/ir/inference/test_xpu_delete_repeated_ops_pass.py +++ b/test/ir/inference/test_xpu_delete_repeated_ops_pass.py @@ -102,7 +102,7 @@ class TestDeleteRepeatedShapePass(PassAutoScanTest): class TestDeleteRepeatedSlicePass(PassAutoScanTest): def sample_predictor_configs(self, program_config): config = self.create_inference_config(use_xpu=True) - yield config, ['slice', 'cast', 'cast', 'cast'], (1e-5, 1e-5) + yield config, ['slice'], (1e-5, 1e-5) def sample_program_config(self, draw): slice_x = draw( @@ -122,15 +122,6 @@ class TestDeleteRepeatedSlicePass(PassAutoScanTest): decrease_axis=[0], outputs={"Out": ["slice0_out"]}, ) - cast_op0 = OpConfig( - "cast", - inputs={ - "X": ["slice0_out"], - }, - in_dtype=5, - out_dtype=5, - outputs={"Out": ["cast0_out"]}, - ) slice_op1 = OpConfig( "slice", inputs={ @@ -142,15 +133,6 @@ class TestDeleteRepeatedSlicePass(PassAutoScanTest): decrease_axis=[0], outputs={"Out": ["slice1_out"]}, ) - cast_op1 = OpConfig( - "cast", - inputs={ - "X": ["slice1_out"], - }, - in_dtype=5, - out_dtype=5, - outputs={"Out": ["cast1_out"]}, - ) slice_op2 = OpConfig( "slice", inputs={ @@ -162,16 +144,7 @@ class TestDeleteRepeatedSlicePass(PassAutoScanTest): decrease_axis=[0], outputs={"Out": ["slice2_out"]}, ) - cast_op2 = OpConfig( - "cast", - inputs={ - "X": ["slice2_out"], - }, - in_dtype=5, - out_dtype=5, - outputs={"Out": ["cast2_out"]}, - ) - ops = [slice_op0, cast_op0, slice_op1, cast_op1, slice_op2, cast_op2] + ops = [slice_op0, slice_op1, slice_op2] program_config = ProgramConfig( ops=ops, @@ -179,7 +152,7 @@ class TestDeleteRepeatedSlicePass(PassAutoScanTest): inputs={ "slice_x": TensorConfig(shape=slice_x), }, - outputs=["cast0_out", "cast1_out", "cast2_out"], + outputs=["slice0_out", "slice1_out", "slice2_out"], ) return program_config