diff --git a/paddleslim/auto_compression/transformer_pruner.py b/paddleslim/auto_compression/transformer_pruner.py index 8371bfab42651d1c7d5702329e934d095effe347..0cb3011e39aa81791e54d54cc4b86bb82e94aa3c 100644 --- a/paddleslim/auto_compression/transformer_pruner.py +++ b/paddleslim/auto_compression/transformer_pruner.py @@ -49,6 +49,22 @@ def find_next_ops(block, var_name): return res_ops +def find_op_itself(block, var_name, op_type): + """ + Find ops itself from block by the output variable. + """ + res_ops = [] + for op in block.ops: + if var_name in op.output_arg_names: + if op.type == op_type: + res_ops.append(op) + if len(res_ops) > 1: + _logger.error( + 'the function of find_op_itself has more than one op, maybe something wrong.' + ) + return res_ops + + def insert_eltmul_op(block, op, head_mask, block_num): """ Insert elementwise mul op to matmul input_mask and head_mask to program""" op_idx = block.ops.index(op) @@ -305,6 +321,8 @@ class TransformerPruner: next_op = find_next_ops(block, var_name) if next_op[0].type == 'dropout': op = next_op[0] + else: ### find op itself + op = find_op_itself(block, var_name, op.type())[0] insert_eltmul_op(block, op, head_mask, block_num) logits = block.var(fetch_list[0]) labels = block.create_var( diff --git a/paddleslim/auto_compression/utils/predict.py b/paddleslim/auto_compression/utils/predict.py index a6e5d219c6ecdb8dacbb019c216f43634bbe4512..af1a09b9e7fb1373699e817da428fa9d526c60c1 100644 --- a/paddleslim/auto_compression/utils/predict.py +++ b/paddleslim/auto_compression/utils/predict.py @@ -19,7 +19,7 @@ def with_variable_shape(model_dir, model_filename=None, params_filename=None): paddle.enable_static() exe = paddle.static.Executor(paddle.CPUPlace()) [inference_program, feed_target_names, fetch_targets] = ( - paddle.static.load_inference_model( + paddle.fluid.io.load_inference_model( model_dir, exe, model_filename=model_filename,