未验证 提交 6fb34e74 编写于 作者: W Wang Bojun 提交者: GitHub

fix layernormTrt meanVar alloc bug (#45255)

* fix layernormTrt meanVar alloc bug
上级 1c4134f6
...@@ -56,12 +56,14 @@ class LayerNormOpConverter : public OpConverter { ...@@ -56,12 +56,14 @@ class LayerNormOpConverter : public OpConverter {
nvinfer1::ILayer* layernorm_layer = nullptr; nvinfer1::ILayer* layernorm_layer = nullptr;
if (engine_->with_dynamic_shape()) { if (engine_->with_dynamic_shape()) {
int input_num = 1; int statis_num = 1;
for (int i = begin_norm_axis; i < X->getDimensions().nbDims; i++) { // For dynamic shape,
input_num *= X->getDimensions().d[i]; // the batch num will be taken into account in plugin runtime.
for (int i = 1; i < begin_norm_axis; i++) {
statis_num *= X->getDimensions().d[i];
} }
std::vector<int64_t> mean_shape{input_num}; std::vector<int64_t> mean_shape{statis_num};
std::vector<int64_t> variance_shape{input_num}; std::vector<int64_t> variance_shape{statis_num};
plugin::LayerNormPluginDynamic* plugin = plugin::LayerNormPluginDynamic* plugin =
new plugin::LayerNormPluginDynamic( new plugin::LayerNormPluginDynamic(
static_cast<const float*>(bias_weight.get().values), static_cast<const float*>(bias_weight.get().values),
...@@ -74,12 +76,12 @@ class LayerNormOpConverter : public OpConverter { ...@@ -74,12 +76,12 @@ class LayerNormOpConverter : public OpConverter {
variance_shape); variance_shape);
layernorm_layer = engine_->AddDynamicPlugin(&X, 1, plugin); layernorm_layer = engine_->AddDynamicPlugin(&X, 1, plugin);
} else { } else {
int input_num = 1; int statis_num = 1;
for (int i = begin_norm_axis - 1; i < X->getDimensions().nbDims; i++) { for (int i = 0; i < begin_norm_axis; i++) {
input_num *= X->getDimensions().d[i]; statis_num *= X->getDimensions().d[i];
} }
std::vector<int64_t> mean_shape{input_num}; std::vector<int64_t> mean_shape{statis_num};
std::vector<int64_t> variance_shape{input_num}; std::vector<int64_t> variance_shape{statis_num};
plugin::LayerNormPlugin* plugin = new plugin::LayerNormPlugin( plugin::LayerNormPlugin* plugin = new plugin::LayerNormPlugin(
static_cast<const float*>(bias_weight.get().values), static_cast<const float*>(bias_weight.get().values),
bias_weight.get().count, bias_weight.get().count,
......
...@@ -175,6 +175,11 @@ int LayerNormPluginDynamic::enqueue( ...@@ -175,6 +175,11 @@ int LayerNormPluginDynamic::enqueue(
for (int i = 0; i < input_dims.nbDims; i++) { for (int i = 0; i < input_dims.nbDims; i++) {
input_shape.push_back(input_dims.d[i]); input_shape.push_back(input_dims.d[i]);
} }
// in dynamic shape
// the batch num should be involved in mean/variance shape
mean_shape_[0] *= input_dims.d[0];
variance_shape_[0] *= input_dims.d[0];
const auto input_ddim = phi::make_ddim(input_shape); const auto input_ddim = phi::make_ddim(input_shape);
auto matrix_dim = phi::flatten_to_2d(input_ddim, begin_norm_axis); auto matrix_dim = phi::flatten_to_2d(input_ddim, begin_norm_axis);
int feature_size = static_cast<int>(matrix_dim[1]); int feature_size = static_cast<int>(matrix_dim[1]);
......
...@@ -140,5 +140,123 @@ class TrtConvertLayerNormTest(TrtLayerAutoScanTest): ...@@ -140,5 +140,123 @@ class TrtConvertLayerNormTest(TrtLayerAutoScanTest):
self.run_test() self.run_test()
class TrtConvertLayerNormTest_2(TrtLayerAutoScanTest):
def is_program_valid(self, program_config: ProgramConfig) -> bool:
inputs = program_config.inputs
weights = program_config.weights
attrs = [
program_config.ops[i].attrs for i in range(len(program_config.ops))
]
if attrs[0]['epsilon'] < 0 or attrs[0]['epsilon'] > 0.001:
return False
if attrs[0]['begin_norm_axis'] <= 0 or attrs[0]['begin_norm_axis'] >= (
len(inputs['input_data'].shape) - 1):
return False
return True
def sample_program_configs(self):
def generate_input1(attrs: List[Dict[str, Any]], shape_input):
return np.ones(shape_input).astype(np.float32)
def generate_input2(attrs: List[Dict[str, Any]], shape_input):
begin = attrs[0]["begin_norm_axis"]
sum = 1
for x in range(begin, len(shape_input)):
sum *= shape_input[x]
return np.ones([sum]).astype(np.float32)
for epsilon in [0.0005, -1, 1]:
for begin_norm_axis in [1, 0, -1, 2, 3]:
dics = [{
"epsilon": epsilon,
"begin_norm_axis": begin_norm_axis
}, {}]
ops_config = [{
"op_type": "layer_norm",
"op_inputs": {
"X": ["input_data"],
"Scale": ["scale_data"],
"Bias": ["bias_data"]
},
"op_outputs": {
"Y": ["y_data"],
"Mean": ["saved_mean_data"],
"Variance": ["saved_variance_data"]
},
"op_attrs": dics[0]
}]
ops = self.generate_op_config(ops_config)
shape_input = [2, 64, 3, 3]
program_config = ProgramConfig(
ops=ops,
weights={
"bias_data":
TensorConfig(data_gen=partial(generate_input2, dics,
shape_input)),
"scale_data":
TensorConfig(data_gen=partial(generate_input2, dics,
shape_input))
},
inputs={
"input_data":
TensorConfig(data_gen=partial(generate_input1, dics,
shape_input))
},
outputs=["y_data"])
yield program_config
def sample_predictor_configs(
self, program_config) -> (paddle_infer.Config, List[int], float):
def generate_dynamic_shape(attrs):
self.dynamic_shape.min_input_shape = {"input_data": [1, 64, 3, 3]}
self.dynamic_shape.max_input_shape = {"input_data": [4, 64, 3, 3]}
self.dynamic_shape.opt_input_shape = {"input_data": [2, 64, 3, 3]}
def clear_dynamic_shape():
self.dynamic_shape.min_input_shape = {}
self.dynamic_shape.max_input_shape = {}
self.dynamic_shape.opt_input_shape = {}
def generate_trt_nodes_num(attrs, dynamic_shape):
inputs = program_config.inputs
#if not dynamic_shape:
# if attrs[0]["begin_norm_axis"] >= len(inputs["input_data"].shape) - 1:
# print ("iiiiiii")
# return 0, 3
return 1, 2
attrs = [
program_config.ops[i].attrs for i in range(len(program_config.ops))
]
# for static_shape
clear_dynamic_shape()
self.trt_param.precision = paddle_infer.PrecisionType.Float32
yield self.create_inference_config(), generate_trt_nodes_num(
attrs, False), 1e-5
self.trt_param.precision = paddle_infer.PrecisionType.Half
yield self.create_inference_config(), generate_trt_nodes_num(
attrs, False), 1e-2
# for dynamic_shape
generate_dynamic_shape(attrs)
self.trt_param.precision = paddle_infer.PrecisionType.Float32
yield self.create_inference_config(), generate_trt_nodes_num(
attrs, True), 1e-5
self.trt_param.precision = paddle_infer.PrecisionType.Half
yield self.create_inference_config(), generate_trt_nodes_num(
attrs, True), 1e-2
def test(self):
self.run_test()
if __name__ == "__main__": if __name__ == "__main__":
unittest.main() unittest.main()
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册