diff --git a/paddle/fluid/framework/ir/embedding_eltwise_layernorm_fuse_pass.cc b/paddle/fluid/framework/ir/embedding_eltwise_layernorm_fuse_pass.cc index ac08929db9737f0e4af39a2fd506eff947e3d9b0..8b37b31ff5cd82cb4347041a9b30b8b513725d1d 100644 --- a/paddle/fluid/framework/ir/embedding_eltwise_layernorm_fuse_pass.cc +++ b/paddle/fluid/framework/ir/embedding_eltwise_layernorm_fuse_pass.cc @@ -307,11 +307,44 @@ int EmbeddingEltwiseLayerNormFusePass::BuildFusion( std::vector ids; std::vector embs; + + auto ids0_shape = start_pattern_in_nodes[i][0].first->Var()->GetShape(); + bool flag = true; for (size_t iter = 0; iter < start_pattern_in_nodes[i].size(); ++iter) { + auto ids_shape = start_pattern_in_nodes[i][iter].first->Var()->GetShape(); + if (ids_shape.size() != ids0_shape.size()) { + VLOG(3) << "Shape check failed, ids'rank are not all equal, stop " + "embedding_eltwise_layernorm_fuse_pass."; + flag = false; + } else { + for (size_t j = 0; j < ids_shape.size(); ++j) { + if (ids_shape[j] != ids0_shape[j]) { + VLOG(3) + << "Shape check failed, ids.shape[i] are not all equal, stop " + "embedding_eltwise_layernorm_fuse_pass."; + flag = false; + } + } + } ids.push_back(start_pattern_in_nodes[i][iter].first->Name()); embs.push_back(start_pattern_in_nodes[i][iter].second->Name()); } for (size_t iter = 0; iter < js.size(); ++iter) { + auto ids_shape = inner_pattern_ins[js[iter]].first->Var()->GetShape(); + if (ids_shape.size() != ids0_shape.size()) { + VLOG(3) << "Shape check failed, ids'rank are not all equal, stop " + "embedding_eltwise_layernorm_fuse_pass."; + flag = false; + } else { + for (size_t j = 0; j < ids_shape.size(); ++j) { + if (ids_shape[j] != ids0_shape[j]) { + VLOG(3) + << "Shape check failed, ids.shape[i] are not all equal, stop " + "embedding_eltwise_layernorm_fuse_pass."; + flag = false; + } + } + } ids.push_back(inner_pattern_ins[js[iter]].first->Name()); embs.push_back(inner_pattern_ins[js[iter]].second->Name()); } @@ -322,66 +355,70 @@ int EmbeddingEltwiseLayerNormFusePass::BuildFusion( "inputs with lookup_table_v2"; return fusion_count; } + if (flag) { + OpDesc new_op_desc; + new_op_desc.SetType("fused_embedding_eltwise_layernorm"); + new_op_desc.SetInput("Ids", ids); + new_op_desc.SetInput("Embs", embs); + new_op_desc.SetInput("WordId", {ids[0]}); + new_op_desc.SetInput("PosId", {ids[1]}); + if (ids.size() > 2) { + new_op_desc.SetInput("SentId", {ids[2]}); + } - OpDesc new_op_desc; - new_op_desc.SetType("fused_embedding_eltwise_layernorm"); - new_op_desc.SetInput("Ids", ids); - new_op_desc.SetInput("Embs", embs); - new_op_desc.SetInput("WordId", {ids[0]}); - new_op_desc.SetInput("PosId", {ids[1]}); - if (ids.size() > 2) { - new_op_desc.SetInput("SentId", {ids[2]}); - } - - new_op_desc.SetInput("WordEmbedding", {embs[0]}); - new_op_desc.SetInput("PosEmbedding", {embs[1]}); - if (embs.size() > 2) { - new_op_desc.SetInput("SentEmbedding", {embs[2]}); - } + new_op_desc.SetInput("WordEmbedding", {embs[0]}); + new_op_desc.SetInput("PosEmbedding", {embs[1]}); + if (embs.size() > 2) { + new_op_desc.SetInput("SentEmbedding", {embs[2]}); + } - new_op_desc.SetInput("Bias", {end_pattern_biases[k]->Name()}); - new_op_desc.SetInput("Scale", {end_pattern_scales[k]->Name()}); - new_op_desc.SetOutput("Out", {end_pattern_out[k]->Name()}); - new_op_desc.SetAttr("epsilon", - end_patter_layernorms[k]->Op()->GetAttr("epsilon")); - - if (end_patter_layernorms[k]->Op()->HasAttr("out_threshold")) { - new_op_desc.SetAttr("enable_int8", true); - new_op_desc.SetAttr( - "out_threshold", - end_patter_layernorms[k]->Op()->GetAttr("out_threshold")); - } + new_op_desc.SetInput("Bias", {end_pattern_biases[k]->Name()}); + new_op_desc.SetInput("Scale", {end_pattern_scales[k]->Name()}); + new_op_desc.SetOutput("Out", {end_pattern_out[k]->Name()}); + new_op_desc.SetAttr("epsilon", + end_patter_layernorms[k]->Op()->GetAttr("epsilon")); + + if (end_patter_layernorms[k]->Op()->HasAttr("out_threshold")) { + new_op_desc.SetAttr("enable_int8", true); + new_op_desc.SetAttr( + "out_threshold", + end_patter_layernorms[k]->Op()->GetAttr("out_threshold")); + } - auto* embedding_eltwise_layernorm = graph->CreateOpNode(&new_op_desc); + auto* embedding_eltwise_layernorm = graph->CreateOpNode(&new_op_desc); - for (size_t iter = 0; iter < start_pattern_in_nodes[i].size(); ++iter) { - IR_NODE_LINK_TO(start_pattern_in_nodes[i][iter].first, - embedding_eltwise_layernorm); - IR_NODE_LINK_TO(start_pattern_in_nodes[i][iter].second, - embedding_eltwise_layernorm); - } - for (size_t iter = 0; iter < js.size(); ++iter) { - IR_NODE_LINK_TO(inner_pattern_ins[js[iter]].first, - embedding_eltwise_layernorm); - IR_NODE_LINK_TO(inner_pattern_ins[js[iter]].second, - embedding_eltwise_layernorm); - } - IR_NODE_LINK_TO(end_pattern_biases[k], embedding_eltwise_layernorm); - IR_NODE_LINK_TO(end_pattern_scales[k], embedding_eltwise_layernorm); - IR_NODE_LINK_TO(embedding_eltwise_layernorm, end_pattern_out[k]); - - // Remove unneeded nodes. - std::unordered_set marked_nodes; - marked_nodes.insert(start_pattern_remove_nodes[i].begin(), - start_pattern_remove_nodes[i].end()); - marked_nodes.insert(end_pattern_remove_nodes[k].begin(), - end_pattern_remove_nodes[k].end()); - for (size_t iter = 0; iter < js.size(); ++iter) { - marked_nodes.insert(inner_pattern_remove_nodes[js[iter]].begin(), - inner_pattern_remove_nodes[js[iter]].end()); + for (size_t iter = 0; iter < start_pattern_in_nodes[i].size(); ++iter) { + IR_NODE_LINK_TO(start_pattern_in_nodes[i][iter].first, + embedding_eltwise_layernorm); + IR_NODE_LINK_TO(start_pattern_in_nodes[i][iter].second, + embedding_eltwise_layernorm); + } + for (size_t iter = 0; iter < js.size(); ++iter) { + IR_NODE_LINK_TO(inner_pattern_ins[js[iter]].first, + embedding_eltwise_layernorm); + IR_NODE_LINK_TO(inner_pattern_ins[js[iter]].second, + embedding_eltwise_layernorm); + } + IR_NODE_LINK_TO(end_pattern_biases[k], embedding_eltwise_layernorm); + IR_NODE_LINK_TO(end_pattern_scales[k], embedding_eltwise_layernorm); + IR_NODE_LINK_TO(embedding_eltwise_layernorm, end_pattern_out[k]); + + // Remove unneeded nodes. + std::unordered_set marked_nodes; + marked_nodes.insert(start_pattern_remove_nodes[i].begin(), + start_pattern_remove_nodes[i].end()); + marked_nodes.insert(end_pattern_remove_nodes[k].begin(), + end_pattern_remove_nodes[k].end()); + for (size_t iter = 0; iter < js.size(); ++iter) { + marked_nodes.insert(inner_pattern_remove_nodes[js[iter]].begin(), + inner_pattern_remove_nodes[js[iter]].end()); + } + GraphSafeRemoveNodes(graph, marked_nodes); + ++fusion_count; + } else { + VLOG(3) << "Shape check failed, stop " + "embedding_eltwise_layernorm_fuse_pass."; } - GraphSafeRemoveNodes(graph, marked_nodes); - ++fusion_count; } return fusion_count; diff --git a/paddle/fluid/framework/ir/trt_embedding_eltwise_layernorm_fuse_pass.cc b/paddle/fluid/framework/ir/trt_embedding_eltwise_layernorm_fuse_pass.cc index 8bb0c8ce67d062823a8f4274a8f49adb67511e82..80d7ade84581baa41e06f27e53cd35b84856e9dc 100644 --- a/paddle/fluid/framework/ir/trt_embedding_eltwise_layernorm_fuse_pass.cc +++ b/paddle/fluid/framework/ir/trt_embedding_eltwise_layernorm_fuse_pass.cc @@ -311,68 +311,105 @@ int TrtEmbeddingEltwiseLayerNormFusePass::BuildFusion( std::vector ids; std::vector embs; + + auto ids0_shape = start_pattern_in_nodes[i][0].first->Var()->GetShape(); + bool flag = true; for (size_t iter = 0; iter < start_pattern_in_nodes[i].size(); ++iter) { + auto ids_shape = start_pattern_in_nodes[i][iter].first->Var()->GetShape(); + if (ids_shape.size() != ids0_shape.size()) { + VLOG(3) << "Shape check failed, ids'rank are not all equal, stop " + "trt_embedding_eltwise_layernorm_fuse_pass."; + flag = false; + } else { + for (size_t j = 0; j < ids_shape.size(); ++j) { + if (ids_shape[j] != ids0_shape[j]) { + VLOG(3) + << "Shape check failed, ids.shape[i] are not all equal, stop " + "trt_embedding_eltwise_layernorm_fuse_pass."; + flag = false; + } + } + } ids.push_back(start_pattern_in_nodes[i][iter].first->Name()); embs.push_back(start_pattern_in_nodes[i][iter].second->Name()); } for (size_t iter = 0; iter < js.size(); ++iter) { + auto ids_shape = inner_pattern_ins[js[iter]].first->Var()->GetShape(); + if (ids_shape.size() != ids0_shape.size()) { + VLOG(3) << "Shape check failed, ids'rank are not all equal, stop " + "trt_embedding_eltwise_layernorm_fuse_pass."; + flag = false; + } else { + for (size_t j = 0; j < ids_shape.size(); ++j) { + if (ids_shape[j] != ids0_shape[j]) { + VLOG(3) + << "Shape check failed, ids.shape[i] are not all equal, stop " + "trt_embedding_eltwise_layernorm_fuse_pass."; + flag = false; + } + } + } ids.push_back(inner_pattern_ins[js[iter]].first->Name()); embs.push_back(inner_pattern_ins[js[iter]].second->Name()); } - OpDesc new_op_desc(end_patter_layernorms[0]->Op()->Block()); - new_op_desc.SetType("fused_embedding_eltwise_layernorm"); - new_op_desc.SetInput("Ids", ids); - new_op_desc.SetInput("Embs", embs); - if (use_varseqlen && pos_id != "" && mask_id != "") { - new_op_desc.SetInput("PosId", {pos_id}); - new_op_desc.SetInput("MaskId", {mask_id}); - } - new_op_desc.SetInput("Bias", {end_pattern_biases[k]->Name()}); - new_op_desc.SetInput("Scale", {end_pattern_scales[k]->Name()}); - new_op_desc.SetOutput("Out", {end_pattern_out[k]->Name()}); - new_op_desc.SetAttr("epsilon", - end_patter_layernorms[k]->Op()->GetAttr("epsilon")); - - if (end_patter_layernorms[k]->Op()->HasAttr("out_threshold")) { - new_op_desc.SetAttr("enable_int8", true); - new_op_desc.SetAttr( - "out_threshold", - end_patter_layernorms[k]->Op()->GetAttr("out_threshold")); - } + if (flag) { + OpDesc new_op_desc(end_patter_layernorms[0]->Op()->Block()); + new_op_desc.SetType("fused_embedding_eltwise_layernorm"); + new_op_desc.SetInput("Ids", ids); + new_op_desc.SetInput("Embs", embs); + if (use_varseqlen && pos_id != "" && mask_id != "") { + new_op_desc.SetInput("PosId", {pos_id}); + new_op_desc.SetInput("MaskId", {mask_id}); + } + new_op_desc.SetInput("Bias", {end_pattern_biases[k]->Name()}); + new_op_desc.SetInput("Scale", {end_pattern_scales[k]->Name()}); + new_op_desc.SetOutput("Out", {end_pattern_out[k]->Name()}); + new_op_desc.SetAttr("epsilon", + end_patter_layernorms[k]->Op()->GetAttr("epsilon")); + + if (end_patter_layernorms[k]->Op()->HasAttr("out_threshold")) { + new_op_desc.SetAttr("enable_int8", true); + new_op_desc.SetAttr( + "out_threshold", + end_patter_layernorms[k]->Op()->GetAttr("out_threshold")); + } - auto* embedding_eltwise_layernorm = graph->CreateOpNode(&new_op_desc); + auto* embedding_eltwise_layernorm = graph->CreateOpNode(&new_op_desc); - for (size_t iter = 0; iter < start_pattern_in_nodes[i].size(); ++iter) { - IR_NODE_LINK_TO(start_pattern_in_nodes[i][iter].first, - embedding_eltwise_layernorm); - IR_NODE_LINK_TO(start_pattern_in_nodes[i][iter].second, - embedding_eltwise_layernorm); - } - for (size_t iter = 0; iter < js.size(); ++iter) { - IR_NODE_LINK_TO(inner_pattern_ins[js[iter]].first, - embedding_eltwise_layernorm); - IR_NODE_LINK_TO(inner_pattern_ins[js[iter]].second, - embedding_eltwise_layernorm); - } - IR_NODE_LINK_TO(end_pattern_biases[k], embedding_eltwise_layernorm); - IR_NODE_LINK_TO(end_pattern_scales[k], embedding_eltwise_layernorm); - IR_NODE_LINK_TO(embedding_eltwise_layernorm, end_pattern_out[k]); - - // Remove unneeded nodes. - std::unordered_set marked_nodes; - marked_nodes.insert(start_pattern_remove_nodes[i].begin(), - start_pattern_remove_nodes[i].end()); - marked_nodes.insert(end_pattern_remove_nodes[k].begin(), - end_pattern_remove_nodes[k].end()); - for (size_t iter = 0; iter < js.size(); ++iter) { - marked_nodes.insert(inner_pattern_remove_nodes[js[iter]].begin(), - inner_pattern_remove_nodes[js[iter]].end()); + for (size_t iter = 0; iter < start_pattern_in_nodes[i].size(); ++iter) { + IR_NODE_LINK_TO(start_pattern_in_nodes[i][iter].first, + embedding_eltwise_layernorm); + IR_NODE_LINK_TO(start_pattern_in_nodes[i][iter].second, + embedding_eltwise_layernorm); + } + for (size_t iter = 0; iter < js.size(); ++iter) { + IR_NODE_LINK_TO(inner_pattern_ins[js[iter]].first, + embedding_eltwise_layernorm); + IR_NODE_LINK_TO(inner_pattern_ins[js[iter]].second, + embedding_eltwise_layernorm); + } + IR_NODE_LINK_TO(end_pattern_biases[k], embedding_eltwise_layernorm); + IR_NODE_LINK_TO(end_pattern_scales[k], embedding_eltwise_layernorm); + IR_NODE_LINK_TO(embedding_eltwise_layernorm, end_pattern_out[k]); + + // Remove unneeded nodes. + std::unordered_set marked_nodes; + marked_nodes.insert(start_pattern_remove_nodes[i].begin(), + start_pattern_remove_nodes[i].end()); + marked_nodes.insert(end_pattern_remove_nodes[k].begin(), + end_pattern_remove_nodes[k].end()); + for (size_t iter = 0; iter < js.size(); ++iter) { + marked_nodes.insert(inner_pattern_remove_nodes[js[iter]].begin(), + inner_pattern_remove_nodes[js[iter]].end()); + } + GraphSafeRemoveNodes(graph, marked_nodes); + ++fusion_count; + } else { + VLOG(3) << "Shape check failed, stop " + "trt_embedding_eltwise_layernorm_fuse_pass."; } - GraphSafeRemoveNodes(graph, marked_nodes); - ++fusion_count; } - return fusion_count; } diff --git a/test/ir/inference/CMakeLists.txt b/test/ir/inference/CMakeLists.txt index 759c65cf187961ad61d1a63caafa4fdd288a7f97..34779cde8d0c0b49a7739713f1888d072f42c0bd 100755 --- a/test/ir/inference/CMakeLists.txt +++ b/test/ir/inference/CMakeLists.txt @@ -210,8 +210,8 @@ if(WITH_GPU AND TENSORRT_FOUND) set_tests_properties(test_merge_layernorm_fuse_pass PROPERTIES TIMEOUT 180) set_tests_properties(test_skip_merge_layernorm_fuse_pass PROPERTIES TIMEOUT 180) - set_tests_properties(test_emb_eltwise_layernorm_fuse_pass PROPERTIES TIMEOUT - 120) + set_tests_properties(test_trt_emb_eltwise_layernorm_fuse_pass + PROPERTIES TIMEOUT 180) set_tests_properties(test_fc_fuse_pass PROPERTIES TIMEOUT 240) set_tests_properties(test_reverse_roll_fuse_pass PROPERTIES TIMEOUT 120) diff --git a/test/ir/inference/test_emb_eltwise_layernorm_fuse_pass.py b/test/ir/inference/test_emb_eltwise_layernorm_fuse_pass.py index 0f0420d59336bf5b3a2ca862a0bae723804f9da9..b0a438f173b03c3f4dfff447d22b4ba5582d5af3 100644 --- a/test/ir/inference/test_emb_eltwise_layernorm_fuse_pass.py +++ b/test/ir/inference/test_emb_eltwise_layernorm_fuse_pass.py @@ -17,11 +17,9 @@ from functools import partial import hypothesis.strategies as st import numpy as np -from auto_scan_test import IgnoreReasons, PassAutoScanTest +from auto_scan_test import PassAutoScanTest from program_config import OpConfig, ProgramConfig, TensorConfig -import paddle.inference as paddle_infer - class TestEmbeddingEltwiseLayerNormFusePass(PassAutoScanTest): r''' @@ -43,48 +41,18 @@ class TestEmbeddingEltwiseLayerNormFusePass(PassAutoScanTest): ''' def is_program_valid(self, program_config: ProgramConfig) -> bool: - # is_sparse is only support False - if program_config.ops[0].attrs['is_sparse']: - return False - - # is_distributed only support False - if program_config.ops[0].attrs['is_distributed']: - return False - - # axis only support -1 and the last dim. - if program_config.ops[3].attrs['axis'] not in [-1, 2]: - return False - - if not ( - program_config.ops[5].attrs['epsilon'] >= 0 - and program_config.ops[5].attrs['epsilon'] <= 0.001 - ): - return False - - if program_config.ops[5].attrs['begin_norm_axis'] != 2: - return False - - # input check - if ( - program_config.weights['embedding_weight1'].shape[1] - != program_config.weights['layer_norm_scale'].shape[0] - ): - return False - return True def sample_program_config(self, draw): - is_sparse = draw(st.booleans()) - is_distributed = draw(st.booleans()) - padding_idx = draw(st.integers()) - axis = draw(st.integers(min_value=-4, max_value=4)) + padding_idx = -1 + axis = -1 op_type = draw(st.sampled_from(['lookup_table', 'lookup_table_v2'])) - epsilon = draw(st.floats(min_value=0, max_value=0.001)) + epsilon = draw(st.floats(min_value=0.0001, max_value=0.001)) # begin_norm_axis has to be 2 begin_norm_axis = 2 batch_size = draw(st.integers(min_value=1, max_value=4)) - input_dim = draw(st.sampled_from([32, 64])) - weight_size = draw(st.sampled_from([[64, 64], [64, 32]])) + input_dim = 128 + weight_size = [64, 384] def generate_input(attrs): if attrs[0]['op_type'] == 'lookup_table': @@ -102,23 +70,22 @@ class TestEmbeddingEltwiseLayerNormFusePass(PassAutoScanTest): def generate_weight1(attrs): # set embedding weight by attrs - return np.random.random(attrs['weight_size']).astype(np.float32) + return np.random.uniform(0.1, 0.1, attrs['weight_size']).astype( + np.float32 + ) def generate_weight2(attrs): - # set layernorm weight by attrs - if attrs[2]['begin_norm_axis'] == 1: - return np.random.random( - attrs[3]['input_dim'] * attrs[3]['weight_size'][1] - ).astype(np.float32) - else: - return np.random.random(attrs[3]['weight_size'][1]).astype( - np.float32 - ) + return np.random.uniform(1, 1.1, attrs[3]['weight_size'][1]).astype( + np.float32 + ) + + def generate_weight3(attrs): + return np.random.uniform( + 0.001, 0.005, attrs[3]['weight_size'][1] + ).astype(np.float32) attrs = [ { - 'is_sparse': is_sparse, - 'is_distributed': is_distributed, 'padding_idx': padding_idx, 'op_type': op_type, }, @@ -136,8 +103,6 @@ class TestEmbeddingEltwiseLayerNormFusePass(PassAutoScanTest): inputs={"Ids": ["input_data1"], "W": ["embedding_weight1"]}, outputs={"Out": ["embedding_output1"]}, attrs={ - 'is_sparse': attrs[0]['is_sparse'], - 'is_distributed': attrs[0]['is_distributed'], 'padding_idx': attrs[0]['padding_idx'], }, ) @@ -146,8 +111,6 @@ class TestEmbeddingEltwiseLayerNormFusePass(PassAutoScanTest): inputs={"Ids": ["input_data2"], "W": ["embedding_weight2"]}, outputs={"Out": ["embedding_output2"]}, attrs={ - 'is_sparse': attrs[0]['is_sparse'], - 'is_distributed': attrs[0]['is_distributed'], 'padding_idx': attrs[0]['padding_idx'], }, ) @@ -156,8 +119,6 @@ class TestEmbeddingEltwiseLayerNormFusePass(PassAutoScanTest): inputs={"Ids": ["input_data3"], "W": ["embedding_weight3"]}, outputs={"Out": ["embedding_output3"]}, attrs={ - 'is_sparse': attrs[0]['is_sparse'], - 'is_distributed': attrs[0]['is_distributed'], 'padding_idx': attrs[0]['padding_idx'], }, ) @@ -210,7 +171,7 @@ class TestEmbeddingEltwiseLayerNormFusePass(PassAutoScanTest): data_gen=partial(generate_weight1, attrs[3]) ), "layer_norm_bias": TensorConfig( - data_gen=partial(generate_weight2, attrs) + data_gen=partial(generate_weight3, attrs) ), "layer_norm_scale": TensorConfig( data_gen=partial(generate_weight2, attrs) @@ -236,81 +197,244 @@ class TestEmbeddingEltwiseLayerNormFusePass(PassAutoScanTest): # only used in gpu passes and trt passes. config = self.create_inference_config(use_gpu=True) yield config, ['fused_embedding_eltwise_layernorm'], (1e-5, 1e-5) - # trt static_shape - config = self.create_trt_inference_config() - config.enable_tensorrt_engine( - max_batch_size=4, - workspace_size=102400, - min_subgraph_size=0, - precision_mode=paddle_infer.PrecisionType.Half, - use_static=False, - use_calib_mode=False, + + def add_ignore_pass_case(self): + pass + + def test(self): + # this fuse need to fix, now there's no program can ran successfully + self.run_and_statis( + quant=False, + max_examples=50, + passes=["embedding_eltwise_layernorm_fuse_pass"], + min_success_num=0, ) - yield config, ['fused_embedding_eltwise_layernorm'], (1e-5, 1e-5) - # trt dynamic_shape - config = self.create_trt_inference_config() - config.enable_tensorrt_engine( - max_batch_size=4, - workspace_size=102400, - min_subgraph_size=0, - precision_mode=paddle_infer.PrecisionType.Half, - use_static=False, - use_calib_mode=False, + + +class TestEmbeddingEltwiseLayerNormFusePassNoBroadcast(PassAutoScanTest): + r''' + in_var1 emb_var in_var2 emb_var in_var3 emb_var in_var emb_var + | | | | | | | | + lookup_table lookup_table lookup_table ... lookup_table + | | | | + lkt_var lkt_var lkt_var lkt_var + \ / | ... | + elementwise_add | | + \ / | + elementwise_add | + | | + elt_var / + \ / + elementwise_add + | + layer_norm + ''' + + def is_program_valid(self, program_config: ProgramConfig) -> bool: + return True + + def sample_program_config(self, draw): + padding_idx = 0 + axis = -1 + op_type = draw(st.sampled_from(['lookup_table', 'lookup_table_v2'])) + epsilon = 0.0001 + # begin_norm_axis has to be 2 + begin_norm_axis = 2 + batch_size = 4 + input_dim = [128, 128, 1] + weight_size = [64, 384] + + def generate_input1(attrs): + if attrs[0]['op_type'] == 'lookup_table': + return np.random.randint( + 0, + attrs[3]['weight_size'][0], + size=(attrs[3]['batch_size'], attrs[3]['input_dim'][0], 1), + ).astype(np.int64) + else: + return np.random.randint( + 0, + attrs[3]['weight_size'][0], + size=(attrs[3]['batch_size'], attrs[3]['input_dim'][0]), + ).astype(np.int64) + + def generate_input2(attrs): + if attrs[0]['op_type'] == 'lookup_table': + return np.random.randint( + 0, + attrs[3]['weight_size'][0], + size=(attrs[3]['batch_size'], attrs[3]['input_dim'][1], 1), + ).astype(np.int64) + else: + return np.random.randint( + 0, + attrs[3]['weight_size'][0], + size=(attrs[3]['batch_size'], attrs[3]['input_dim'][1]), + ).astype(np.int64) + + def generate_input3(attrs): + if attrs[0]['op_type'] == 'lookup_table': + return np.random.randint( + 0, + attrs[3]['weight_size'][0], + size=(attrs[3]['batch_size'], attrs[3]['input_dim'][2], 1), + ).astype(np.int64) + else: + return np.random.randint( + 0, + attrs[3]['weight_size'][0], + size=(attrs[3]['batch_size'], attrs[3]['input_dim'][2]), + ).astype(np.int64) + + def generate_weight1(attrs): + # set embedding weight by attrs + return np.random.uniform(0.1, 0.1, attrs['weight_size']).astype( + np.float32 + ) + + def generate_weight2(attrs): + return np.random.uniform(1, 1.1, attrs[3]['weight_size'][1]).astype( + np.float32 + ) + + def generate_weight3(attrs): + return np.random.uniform( + 0.001, 0.005, attrs[3]['weight_size'][1] + ).astype(np.float32) + + attrs = [ + { + 'padding_idx': padding_idx, + 'op_type': op_type, + }, + {'axis': axis}, + {'begin_norm_axis': begin_norm_axis, 'epsilon': epsilon}, + { + 'batch_size': batch_size, + 'input_dim': input_dim, + 'weight_size': weight_size, + }, + ] + + emb_op1 = OpConfig( + type=attrs[0]['op_type'], + inputs={"Ids": ["input_data1"], "W": ["embedding_weight1"]}, + outputs={"Out": ["embedding_output1"]}, + attrs={ + 'padding_idx': attrs[0]['padding_idx'], + }, + ) + emb_op2 = OpConfig( + type=attrs[0]['op_type'], + inputs={"Ids": ["input_data2"], "W": ["embedding_weight2"]}, + outputs={"Out": ["embedding_output2"]}, + attrs={ + 'padding_idx': attrs[0]['padding_idx'], + }, + ) + emb_op3 = OpConfig( + type=attrs[0]['op_type'], + inputs={"Ids": ["input_data3"], "W": ["embedding_weight3"]}, + outputs={"Out": ["embedding_output3"]}, + attrs={ + 'padding_idx': attrs[0]['padding_idx'], + }, ) + add_op1 = OpConfig( + type='elementwise_add', + inputs={ + "X": [emb_op2.outputs["Out"][0]], + "Y": [emb_op3.outputs["Out"][0]], + }, + outputs={"Out": ["elementwise_add_output1"]}, + attrs={"axis": attrs[1]['axis']}, + ) + add_op2 = OpConfig( + type='elementwise_add', + inputs={ + "X": [add_op1.outputs["Out"][0]], + "Y": [emb_op1.outputs["Out"][0]], + }, + outputs={"Out": ["elementwise_add_output2"]}, + attrs={"axis": attrs[1]['axis']}, + ) + layer_norm_op = OpConfig( + type='layer_norm', + inputs={ + "X": [add_op2.outputs["Out"][0]], + "Bias": ["layer_norm_bias"], + "Scale": ["layer_norm_scale"], + }, + outputs={ + "Y": ["layer_norm_output1"], + "Mean": ["layer_norm_output2"], + "Variance": ["layer_norm_output3"], + }, + attrs={ + 'begin_norm_axis': attrs[2]['begin_norm_axis'], + 'epsilon': attrs[2]['epsilon'], + }, + ) + + program_config = ProgramConfig( + ops=[emb_op1, emb_op2, emb_op3, add_op1, add_op2, layer_norm_op], + weights={ + "embedding_weight1": TensorConfig( + data_gen=partial(generate_weight1, attrs[3]) + ), + "embedding_weight2": TensorConfig( + data_gen=partial(generate_weight1, attrs[3]) + ), + "embedding_weight3": TensorConfig( + data_gen=partial(generate_weight1, attrs[3]) + ), + "layer_norm_bias": TensorConfig( + data_gen=partial(generate_weight3, attrs) + ), + "layer_norm_scale": TensorConfig( + data_gen=partial(generate_weight2, attrs) + ), + }, + inputs={ + "input_data1": TensorConfig( + data_gen=partial(generate_input1, attrs) + ), + "input_data2": TensorConfig( + data_gen=partial(generate_input2, attrs) + ), + "input_data3": TensorConfig( + data_gen=partial(generate_input3, attrs) + ), + }, + outputs=["layer_norm_output1"], + ) + + return program_config + + def sample_predictor_configs(self, program_config): + # only used in gpu passes and trt passes. + config = self.create_inference_config(use_gpu=True) if program_config.ops[0].type == 'lookup_table': - config.set_trt_dynamic_shape_info( - { - "input_data1": [1, 4, 1], - "input_data2": [1, 4, 1], - "input_data3": [1, 4, 1], - }, - { - "input_data1": [4, 512, 1], - "input_data2": [4, 512, 1], - "input_data3": [4, 512, 1], - }, - { - "input_data1": [2, 128, 1], - "input_data2": [2, 128, 1], - "input_data3": [2, 128, 1], - }, - ) + yield config, [ + 'lookup_table', + 'lookup_table', + 'lookup_table', + 'elementwise_add', + 'elementwise_add', + 'layer_norm', + ], (1e-5, 1e-5) else: - config.set_trt_dynamic_shape_info( - { - "input_data1": [1, 4], - "input_data2": [1, 4], - "input_data3": [1, 4], - }, - { - "input_data1": [4, 512], - "input_data2": [4, 512], - "input_data3": [4, 512], - }, - { - "input_data1": [2, 128], - "input_data2": [2, 128], - "input_data3": [2, 128], - }, - ) - yield config, ['fused_embedding_eltwise_layernorm'], (1e-5, 1e-5) + yield config, [ + 'lookup_table_v2', + 'lookup_table_v2', + 'lookup_table_v2', + 'elementwise_add', + 'elementwise_add', + 'layer_norm', + ], (1e-5, 1e-5) def add_ignore_pass_case(self): - def teller1(program_config, predictor_config): - if ( - program_config.ops[3].attrs['axis'] in [-1, 2] - and program_config.ops[5].attrs['begin_norm_axis'] == 2 - and program_config.weights['embedding_weight1'].shape - in [(64, 32), (64, 64)] - ): - return True - return False - - self.add_ignore_check_case( - teller1, - IgnoreReasons.PASS_ACCURACY_ERROR, - "The pass output has diff in a specific case. We need to fix it as soon as possible.", - ) + pass def test(self): # this fuse need to fix, now there's no program can ran successfully diff --git a/test/ir/inference/test_trt_emb_eltwise_layernorm_fuse_pass.py b/test/ir/inference/test_trt_emb_eltwise_layernorm_fuse_pass.py new file mode 100644 index 0000000000000000000000000000000000000000..068c480d360d4ce457fbd6803591ed2542256a21 --- /dev/null +++ b/test/ir/inference/test_trt_emb_eltwise_layernorm_fuse_pass.py @@ -0,0 +1,544 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import unittest +from functools import partial + +import hypothesis.strategies as st +import numpy as np +from auto_scan_test import PassAutoScanTest +from program_config import OpConfig, ProgramConfig, TensorConfig + +import paddle.inference as paddle_infer + + +class TestEmbeddingEltwiseLayerNormFusePass(PassAutoScanTest): + r''' + in_var1 emb_var in_var2 emb_var in_var3 emb_var in_var emb_var + | | | | | | | | + lookup_table lookup_table lookup_table ... lookup_table + | | | | + lkt_var lkt_var lkt_var lkt_var + \ / | ... | + elementwise_add | | + \ / | + elementwise_add | + | | + elt_var / + \ / + elementwise_add + | + layer_norm + ''' + + def is_program_valid(self, program_config: ProgramConfig) -> bool: + return True + + def sample_program_config(self, draw): + padding_idx = -1 + axis = -1 + op_type = draw(st.sampled_from(['lookup_table', 'lookup_table_v2'])) + epsilon = draw(st.floats(min_value=0.0001, max_value=0.001)) + # begin_norm_axis has to be 2 + begin_norm_axis = 2 + batch_size = draw(st.integers(min_value=1, max_value=4)) + input_dim = 128 + weight_size = [64, 384] + + def generate_input(attrs): + if attrs[0]['op_type'] == 'lookup_table': + return np.random.randint( + 0, + attrs[3]['weight_size'][0], + size=(attrs[3]['batch_size'], attrs[3]['input_dim'], 1), + ).astype(np.int64) + else: + return np.random.randint( + 0, + attrs[3]['weight_size'][0], + size=(attrs[3]['batch_size'], attrs[3]['input_dim']), + ).astype(np.int64) + + def generate_weight1(attrs): + # set embedding weight by attrs + return np.random.uniform(0.05, 0.05, attrs['weight_size']).astype( + np.float32 + ) + + def generate_weight2(attrs): + return np.random.uniform(1, 1.1, attrs[3]['weight_size'][1]).astype( + np.float32 + ) + + def generate_weight3(attrs): + return np.random.uniform( + 0.001, 0.005, attrs[3]['weight_size'][1] + ).astype(np.float32) + + attrs = [ + { + 'padding_idx': padding_idx, + 'op_type': op_type, + }, + {'axis': axis}, + {'begin_norm_axis': begin_norm_axis, 'epsilon': epsilon}, + { + 'batch_size': batch_size, + 'input_dim': input_dim, + 'weight_size': weight_size, + }, + ] + + emb_op1 = OpConfig( + type=attrs[0]['op_type'], + inputs={"Ids": ["input_data1"], "W": ["embedding_weight1"]}, + outputs={"Out": ["embedding_output1"]}, + attrs={ + 'padding_idx': attrs[0]['padding_idx'], + }, + ) + emb_op2 = OpConfig( + type=attrs[0]['op_type'], + inputs={"Ids": ["input_data2"], "W": ["embedding_weight2"]}, + outputs={"Out": ["embedding_output2"]}, + attrs={ + 'padding_idx': attrs[0]['padding_idx'], + }, + ) + emb_op3 = OpConfig( + type=attrs[0]['op_type'], + inputs={"Ids": ["input_data3"], "W": ["embedding_weight3"]}, + outputs={"Out": ["embedding_output3"]}, + attrs={ + 'padding_idx': attrs[0]['padding_idx'], + }, + ) + add_op1 = OpConfig( + type='elementwise_add', + inputs={ + "X": [emb_op2.outputs["Out"][0]], + "Y": [emb_op3.outputs["Out"][0]], + }, + outputs={"Out": ["elementwise_add_output1"]}, + attrs={"axis": attrs[1]['axis']}, + ) + add_op2 = OpConfig( + type='elementwise_add', + inputs={ + "X": [add_op1.outputs["Out"][0]], + "Y": [emb_op1.outputs["Out"][0]], + }, + outputs={"Out": ["elementwise_add_output2"]}, + attrs={"axis": attrs[1]['axis']}, + ) + layer_norm_op = OpConfig( + type='layer_norm', + inputs={ + "X": [add_op2.outputs["Out"][0]], + "Bias": ["layer_norm_bias"], + "Scale": ["layer_norm_scale"], + }, + outputs={ + "Y": ["layer_norm_output1"], + "Mean": ["layer_norm_output2"], + "Variance": ["layer_norm_output3"], + }, + attrs={ + 'begin_norm_axis': attrs[2]['begin_norm_axis'], + 'epsilon': attrs[2]['epsilon'], + }, + ) + + program_config = ProgramConfig( + ops=[emb_op1, emb_op2, emb_op3, add_op1, add_op2, layer_norm_op], + weights={ + "embedding_weight1": TensorConfig( + data_gen=partial(generate_weight1, attrs[3]) + ), + "embedding_weight2": TensorConfig( + data_gen=partial(generate_weight1, attrs[3]) + ), + "embedding_weight3": TensorConfig( + data_gen=partial(generate_weight1, attrs[3]) + ), + "layer_norm_bias": TensorConfig( + data_gen=partial(generate_weight3, attrs) + ), + "layer_norm_scale": TensorConfig( + data_gen=partial(generate_weight2, attrs) + ), + }, + inputs={ + "input_data1": TensorConfig( + data_gen=partial(generate_input, attrs) + ), + "input_data2": TensorConfig( + data_gen=partial(generate_input, attrs) + ), + "input_data3": TensorConfig( + data_gen=partial(generate_input, attrs) + ), + }, + outputs=["layer_norm_output1"], + ) + + return program_config + + def sample_predictor_configs(self, program_config): + # trt dynamic_shape + config = self.create_trt_inference_config() + config.enable_tensorrt_engine( + max_batch_size=4, + workspace_size=1 << 30, + min_subgraph_size=0, + precision_mode=paddle_infer.PrecisionType.Half, + use_static=False, + use_calib_mode=False, + ) + if program_config.ops[0].type == 'lookup_table': + config.set_trt_dynamic_shape_info( + { + "input_data1": [1, 128, 1], + "input_data2": [1, 128, 1], + "input_data3": [1, 128, 1], + }, + { + "input_data1": [4, 128, 1], + "input_data2": [4, 128, 1], + "input_data3": [4, 128, 1], + }, + { + "input_data1": [2, 128, 1], + "input_data2": [2, 128, 1], + "input_data3": [2, 128, 1], + }, + ) + else: + config.set_trt_dynamic_shape_info( + { + "input_data1": [1, 128], + "input_data2": [1, 128], + "input_data3": [1, 128], + }, + { + "input_data1": [4, 128], + "input_data2": [4, 128], + "input_data3": [4, 128], + }, + { + "input_data1": [2, 128], + "input_data2": [2, 128], + "input_data3": [2, 128], + }, + ) + yield config, ['fused_embedding_eltwise_layernorm'], (1e-5, 1e-5) + + def add_ignore_pass_case(self): + pass + + def test(self): + # this fuse need to fix, now there's no program can ran successfully + self.run_and_statis( + quant=False, + max_examples=50, + passes=["trt_embedding_eltwise_layernorm_fuse_pass"], + min_success_num=0, + ) + + +class TestEmbeddingEltwiseLayerNormFusePassNoBroadcast(PassAutoScanTest): + r''' + in_var1 emb_var in_var2 emb_var in_var3 emb_var in_var emb_var + | | | | | | | | + lookup_table lookup_table lookup_table ... lookup_table + | | | | + lkt_var lkt_var lkt_var lkt_var + \ / | ... | + elementwise_add | | + \ / | + elementwise_add | + | | + elt_var / + \ / + elementwise_add + | + layer_norm + ''' + + def is_program_valid(self, program_config: ProgramConfig) -> bool: + return True + + def sample_program_config(self, draw): + padding_idx = -1 + axis = -1 + op_type = draw(st.sampled_from(['lookup_table', 'lookup_table_v2'])) + epsilon = 0.0001 + # begin_norm_axis has to be 2 + begin_norm_axis = 2 + batch_size = 4 + input_dim = [128, 128, 1] + weight_size = [64, 384] + + def generate_input1(attrs): + if attrs[0]['op_type'] == 'lookup_table': + return np.random.randint( + 0, + attrs[3]['weight_size'][0], + size=(attrs[3]['batch_size'], attrs[3]['input_dim'][0], 1), + ).astype(np.int64) + else: + return np.random.randint( + 0, + attrs[3]['weight_size'][0], + size=(attrs[3]['batch_size'], attrs[3]['input_dim'][0]), + ).astype(np.int64) + + def generate_input2(attrs): + if attrs[0]['op_type'] == 'lookup_table': + return np.random.randint( + 0, + attrs[3]['weight_size'][0], + size=(attrs[3]['batch_size'], attrs[3]['input_dim'][1], 1), + ).astype(np.int64) + else: + return np.random.randint( + 0, + attrs[3]['weight_size'][0], + size=(attrs[3]['batch_size'], attrs[3]['input_dim'][1]), + ).astype(np.int64) + + def generate_input3(attrs): + if attrs[0]['op_type'] == 'lookup_table': + return np.random.randint( + 0, + attrs[3]['weight_size'][0], + size=(attrs[3]['batch_size'], attrs[3]['input_dim'][2], 1), + ).astype(np.int64) + else: + return np.random.randint( + 0, + attrs[3]['weight_size'][0], + size=(attrs[3]['batch_size'], attrs[3]['input_dim'][2]), + ).astype(np.int64) + + def generate_weight1(attrs): + # set embedding weight by attrs + return np.random.uniform(0.05, 0.1, attrs['weight_size']).astype( + np.float32 + ) + + def generate_weight2(attrs): + return np.random.uniform(1, 1.1, attrs[3]['weight_size'][1]).astype( + np.float32 + ) + + def generate_weight3(attrs): + return np.random.uniform( + 0.001, 0.005, attrs[3]['weight_size'][1] + ).astype(np.float32) + + attrs = [ + { + 'padding_idx': padding_idx, + 'op_type': op_type, + }, + {'axis': axis}, + {'begin_norm_axis': begin_norm_axis, 'epsilon': epsilon}, + { + 'batch_size': batch_size, + 'input_dim': input_dim, + 'weight_size': weight_size, + }, + ] + + emb_op1 = OpConfig( + type=attrs[0]['op_type'], + inputs={"Ids": ["input_data1"], "W": ["embedding_weight1"]}, + outputs={"Out": ["embedding_output1"]}, + attrs={ + 'padding_idx': attrs[0]['padding_idx'], + }, + ) + emb_op2 = OpConfig( + type=attrs[0]['op_type'], + inputs={"Ids": ["input_data2"], "W": ["embedding_weight2"]}, + outputs={"Out": ["embedding_output2"]}, + attrs={ + 'padding_idx': attrs[0]['padding_idx'], + }, + ) + emb_op3 = OpConfig( + type=attrs[0]['op_type'], + inputs={"Ids": ["input_data3"], "W": ["embedding_weight3"]}, + outputs={"Out": ["embedding_output3"]}, + attrs={ + 'padding_idx': attrs[0]['padding_idx'], + }, + ) + add_op1 = OpConfig( + type='elementwise_add', + inputs={ + "X": [emb_op2.outputs["Out"][0]], + "Y": [emb_op3.outputs["Out"][0]], + }, + outputs={"Out": ["elementwise_add_output1"]}, + attrs={"axis": attrs[1]['axis']}, + ) + add_op2 = OpConfig( + type='elementwise_add', + inputs={ + "X": [add_op1.outputs["Out"][0]], + "Y": [emb_op1.outputs["Out"][0]], + }, + outputs={"Out": ["elementwise_add_output2"]}, + attrs={"axis": attrs[1]['axis']}, + ) + layer_norm_op = OpConfig( + type='layer_norm', + inputs={ + "X": [add_op2.outputs["Out"][0]], + "Bias": ["layer_norm_bias"], + "Scale": ["layer_norm_scale"], + }, + outputs={ + "Y": ["layer_norm_output1"], + "Mean": ["layer_norm_output2"], + "Variance": ["layer_norm_output3"], + }, + attrs={ + 'begin_norm_axis': attrs[2]['begin_norm_axis'], + 'epsilon': attrs[2]['epsilon'], + }, + ) + + program_config = ProgramConfig( + ops=[emb_op1, emb_op2, emb_op3, add_op1, add_op2, layer_norm_op], + weights={ + "embedding_weight1": TensorConfig( + data_gen=partial(generate_weight1, attrs[3]) + ), + "embedding_weight2": TensorConfig( + data_gen=partial(generate_weight1, attrs[3]) + ), + "embedding_weight3": TensorConfig( + data_gen=partial(generate_weight1, attrs[3]) + ), + "layer_norm_bias": TensorConfig( + data_gen=partial(generate_weight3, attrs) + ), + "layer_norm_scale": TensorConfig( + data_gen=partial(generate_weight2, attrs) + ), + }, + inputs={ + "input_data1": TensorConfig( + data_gen=partial(generate_input1, attrs) + ), + "input_data2": TensorConfig( + data_gen=partial(generate_input2, attrs) + ), + "input_data3": TensorConfig( + data_gen=partial(generate_input3, attrs) + ), + }, + outputs=["layer_norm_output1"], + ) + + return program_config + + def sample_predictor_configs(self, program_config): + # trt dynamic_shape + config = self.create_trt_inference_config() + config.enable_tensorrt_engine( + max_batch_size=4, + workspace_size=1 << 30, + min_subgraph_size=0, + precision_mode=paddle_infer.PrecisionType.Half, + use_static=False, + use_calib_mode=False, + ) + if program_config.ops[0].type == 'lookup_table': + config.set_trt_dynamic_shape_info( + { + "embedding_output1": [1, 128, 384], + "embedding_output2": [1, 128, 384], + "embedding_output3": [1, 1, 384], + }, + { + "embedding_output1": [4, 128, 384], + "embedding_output2": [4, 128, 384], + "embedding_output3": [4, 1, 384], + }, + { + "embedding_output1": [2, 128, 384], + "embedding_output2": [2, 128, 384], + "embedding_output3": [2, 1, 384], + }, + ) + config.exp_disable_tensorrt_ops(["lookup_table"]) + config.delete_pass("trt_skip_layernorm_fuse_pass") + config.delete_pass("preln_residual_bias_fuse_pass") + yield config, [ + 'lookup_table', + 'lookup_table', + 'lookup_table', + 'elementwise_add', + 'elementwise_add', + 'layer_norm', + ], (1e-5, 1e-5) + else: + config.set_trt_dynamic_shape_info( + { + "embedding_output1": [1, 128, 384], + "embedding_output2": [1, 128, 384], + "embedding_output3": [1, 1, 384], + }, + { + "embedding_output1": [4, 128, 384], + "embedding_output2": [4, 128, 384], + "embedding_output3": [4, 1, 384], + }, + { + "embedding_output1": [2, 128, 384], + "embedding_output2": [2, 128, 384], + "embedding_output3": [2, 1, 384], + }, + ) + config.exp_disable_tensorrt_ops(["lookup_table_v2"]) + config.delete_pass("trt_skip_layernorm_fuse_pass") + config.delete_pass("preln_residual_bias_fuse_pass") + yield config, [ + 'lookup_table_v2', + 'lookup_table_v2', + 'lookup_table_v2', + 'elementwise_add', + 'elementwise_add', + 'layer_norm', + ], (1e-5, 1e-5) + + def add_ignore_pass_case(self): + pass + + def test(self): + # this fuse need to fix, now there's no program can ran successfully + self.run_and_statis( + quant=False, + max_examples=50, + passes=["trt_embedding_eltwise_layernorm_fuse_pass"], + min_success_num=0, + ) + + +if __name__ == "__main__": + unittest.main()