From 6fb34e743e19510dd99d15f13bb99efc737cd362 Mon Sep 17 00:00:00 2001 From: Wang Bojun <105858416+wwbitejotunn@users.noreply.github.com> Date: Fri, 19 Aug 2022 18:58:47 +0800 Subject: [PATCH] fix layernormTrt meanVar alloc bug (#45255) * fix layernormTrt meanVar alloc bug --- .../tensorrt/convert/layer_norm_op.cc | 22 ++-- .../tensorrt/plugin/layer_norm_op_plugin.cu | 5 + .../inference/test_trt_convert_layer_norm.py | 118 ++++++++++++++++++ 3 files changed, 135 insertions(+), 10 deletions(-) diff --git a/paddle/fluid/inference/tensorrt/convert/layer_norm_op.cc b/paddle/fluid/inference/tensorrt/convert/layer_norm_op.cc index 359ea2b343d..54017666a77 100644 --- a/paddle/fluid/inference/tensorrt/convert/layer_norm_op.cc +++ b/paddle/fluid/inference/tensorrt/convert/layer_norm_op.cc @@ -56,12 +56,14 @@ class LayerNormOpConverter : public OpConverter { nvinfer1::ILayer* layernorm_layer = nullptr; if (engine_->with_dynamic_shape()) { - int input_num = 1; - for (int i = begin_norm_axis; i < X->getDimensions().nbDims; i++) { - input_num *= X->getDimensions().d[i]; + int statis_num = 1; + // For dynamic shape, + // the batch num will be taken into account in plugin runtime. + for (int i = 1; i < begin_norm_axis; i++) { + statis_num *= X->getDimensions().d[i]; } - std::vector mean_shape{input_num}; - std::vector variance_shape{input_num}; + std::vector mean_shape{statis_num}; + std::vector variance_shape{statis_num}; plugin::LayerNormPluginDynamic* plugin = new plugin::LayerNormPluginDynamic( static_cast(bias_weight.get().values), @@ -74,12 +76,12 @@ class LayerNormOpConverter : public OpConverter { variance_shape); layernorm_layer = engine_->AddDynamicPlugin(&X, 1, plugin); } else { - int input_num = 1; - for (int i = begin_norm_axis - 1; i < X->getDimensions().nbDims; i++) { - input_num *= X->getDimensions().d[i]; + int statis_num = 1; + for (int i = 0; i < begin_norm_axis; i++) { + statis_num *= X->getDimensions().d[i]; } - std::vector mean_shape{input_num}; - std::vector variance_shape{input_num}; + std::vector mean_shape{statis_num}; + std::vector variance_shape{statis_num}; plugin::LayerNormPlugin* plugin = new plugin::LayerNormPlugin( static_cast(bias_weight.get().values), bias_weight.get().count, diff --git a/paddle/fluid/inference/tensorrt/plugin/layer_norm_op_plugin.cu b/paddle/fluid/inference/tensorrt/plugin/layer_norm_op_plugin.cu index 9cbe6abe19d..9b0fa45c337 100644 --- a/paddle/fluid/inference/tensorrt/plugin/layer_norm_op_plugin.cu +++ b/paddle/fluid/inference/tensorrt/plugin/layer_norm_op_plugin.cu @@ -175,6 +175,11 @@ int LayerNormPluginDynamic::enqueue( for (int i = 0; i < input_dims.nbDims; i++) { input_shape.push_back(input_dims.d[i]); } + // in dynamic shape + // the batch num should be involved in mean/variance shape + mean_shape_[0] *= input_dims.d[0]; + variance_shape_[0] *= input_dims.d[0]; + const auto input_ddim = phi::make_ddim(input_shape); auto matrix_dim = phi::flatten_to_2d(input_ddim, begin_norm_axis); int feature_size = static_cast(matrix_dim[1]); diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_layer_norm.py b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_layer_norm.py index 16c95bff5e3..4eb18c2de34 100644 --- a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_layer_norm.py +++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_layer_norm.py @@ -140,5 +140,123 @@ class TrtConvertLayerNormTest(TrtLayerAutoScanTest): self.run_test() +class TrtConvertLayerNormTest_2(TrtLayerAutoScanTest): + + def is_program_valid(self, program_config: ProgramConfig) -> bool: + inputs = program_config.inputs + weights = program_config.weights + attrs = [ + program_config.ops[i].attrs for i in range(len(program_config.ops)) + ] + + if attrs[0]['epsilon'] < 0 or attrs[0]['epsilon'] > 0.001: + return False + if attrs[0]['begin_norm_axis'] <= 0 or attrs[0]['begin_norm_axis'] >= ( + len(inputs['input_data'].shape) - 1): + return False + + return True + + def sample_program_configs(self): + + def generate_input1(attrs: List[Dict[str, Any]], shape_input): + return np.ones(shape_input).astype(np.float32) + + def generate_input2(attrs: List[Dict[str, Any]], shape_input): + begin = attrs[0]["begin_norm_axis"] + sum = 1 + for x in range(begin, len(shape_input)): + sum *= shape_input[x] + return np.ones([sum]).astype(np.float32) + + for epsilon in [0.0005, -1, 1]: + for begin_norm_axis in [1, 0, -1, 2, 3]: + dics = [{ + "epsilon": epsilon, + "begin_norm_axis": begin_norm_axis + }, {}] + + ops_config = [{ + "op_type": "layer_norm", + "op_inputs": { + "X": ["input_data"], + "Scale": ["scale_data"], + "Bias": ["bias_data"] + }, + "op_outputs": { + "Y": ["y_data"], + "Mean": ["saved_mean_data"], + "Variance": ["saved_variance_data"] + }, + "op_attrs": dics[0] + }] + ops = self.generate_op_config(ops_config) + shape_input = [2, 64, 3, 3] + program_config = ProgramConfig( + ops=ops, + weights={ + "bias_data": + TensorConfig(data_gen=partial(generate_input2, dics, + shape_input)), + "scale_data": + TensorConfig(data_gen=partial(generate_input2, dics, + shape_input)) + }, + inputs={ + "input_data": + TensorConfig(data_gen=partial(generate_input1, dics, + shape_input)) + }, + outputs=["y_data"]) + + yield program_config + + def sample_predictor_configs( + self, program_config) -> (paddle_infer.Config, List[int], float): + + def generate_dynamic_shape(attrs): + self.dynamic_shape.min_input_shape = {"input_data": [1, 64, 3, 3]} + self.dynamic_shape.max_input_shape = {"input_data": [4, 64, 3, 3]} + self.dynamic_shape.opt_input_shape = {"input_data": [2, 64, 3, 3]} + + def clear_dynamic_shape(): + self.dynamic_shape.min_input_shape = {} + self.dynamic_shape.max_input_shape = {} + self.dynamic_shape.opt_input_shape = {} + + def generate_trt_nodes_num(attrs, dynamic_shape): + inputs = program_config.inputs + #if not dynamic_shape: + # if attrs[0]["begin_norm_axis"] >= len(inputs["input_data"].shape) - 1: + # print ("iiiiiii") + # return 0, 3 + return 1, 2 + + attrs = [ + program_config.ops[i].attrs for i in range(len(program_config.ops)) + ] + + # for static_shape + clear_dynamic_shape() + self.trt_param.precision = paddle_infer.PrecisionType.Float32 + yield self.create_inference_config(), generate_trt_nodes_num( + attrs, False), 1e-5 + self.trt_param.precision = paddle_infer.PrecisionType.Half + yield self.create_inference_config(), generate_trt_nodes_num( + attrs, False), 1e-2 + + # for dynamic_shape + generate_dynamic_shape(attrs) + self.trt_param.precision = paddle_infer.PrecisionType.Float32 + yield self.create_inference_config(), generate_trt_nodes_num( + attrs, True), 1e-5 + self.trt_param.precision = paddle_infer.PrecisionType.Half + yield self.create_inference_config(), generate_trt_nodes_num( + attrs, True), 1e-2 + + def test(self): + self.run_test() + + if __name__ == "__main__": unittest.main() -- GitLab