未验证 提交 94cc1d6b 编写于 作者: G gaoziyuan 提交者: GitHub

[Hackathon NO.75] 为 Paddle-TRT 添加 expend_as_v2 算子 (#51028)



---------
Co-authored-by: NZhang Jun <ewalker@live.cn>
上级 57201d9d
......@@ -2685,6 +2685,7 @@ USE_TRT_CONVERTER(tanh_shrink)
USE_TRT_CONVERTER(logsigmoid)
USE_TRT_CONVERTER(lookup_table)
USE_TRT_CONVERTER(expand_v2)
USE_TRT_CONVERTER(expand_as_v2)
USE_TRT_CONVERTER(take_along_axis)
USE_TRT_CONVERTER(skip_groupnorm_act)
USE_TRT_CONVERTER(preln_groupnorm_act)
......
/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
/* Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
......@@ -18,12 +18,12 @@ namespace paddle {
namespace inference {
namespace tensorrt {
class ExpandV2OpConverter : public OpConverter {
class ExpandOpConverter : public OpConverter {
public:
void operator()(const framework::proto::OpDesc& op,
const framework::Scope& scope,
bool test_mode) override {
VLOG(3) << "convert a expand_v2 op to trt expand layer.";
VLOG(3) << "convert a paddle " << op_type_ << " op to trt expand layer.";
framework::OpDesc op_desc(op, nullptr);
auto* input = engine_->GetITensor(op_desc.Input("X")[0]);
auto inputs = op_desc.Inputs();
......@@ -33,25 +33,40 @@ class ExpandV2OpConverter : public OpConverter {
nvinfer1::ITensor* shape_tensor = nullptr;
int32_t shape_rank = 0;
if (inputs.find("Shape") != inputs.end() &&
op_desc.Input("Shape").size() >= 1) {
shape_tensor = engine_->GetITensor(op_desc.Input("Shape")[0]);
shape_rank = shape_tensor->getDimensions().d[0];
} else if (inputs.find("expand_shapes_tensor") != inputs.end() &&
op_desc.Input("expand_shapes_tensor").size() >= 1) {
int shape_size = op_desc.Input("expand_shapes_tensor").size();
std::vector<nvinfer1::ITensor*> shape_tensors;
for (int i = 0; i < shape_size; ++i) {
shape_tensors.push_back(
engine_->GetITensor(op_desc.Input("expand_shapes_tensor")[i]));
if (op_type_ == "expand_v2") {
if (inputs.find("Shape") != inputs.end() &&
op_desc.Input("Shape").size() >= 1) {
shape_tensor = engine_->GetITensor(op_desc.Input("Shape")[0]);
shape_rank = shape_tensor->getDimensions().nbDims;
} else if (inputs.find("expand_shapes_tensor") != inputs.end() &&
op_desc.Input("expand_shapes_tensor").size() >= 1) {
int shape_size = op_desc.Input("expand_shapes_tensor").size();
std::vector<nvinfer1::ITensor*> shape_tensors;
for (int i = 0; i < shape_size; ++i) {
shape_tensors.push_back(
engine_->GetITensor(op_desc.Input("expand_shapes_tensor")[i]));
}
shape_tensor = Concat(shape_tensors);
shape_rank = shape_size;
} else {
std::vector<int32_t> shape =
PADDLE_GET_CONST(std::vector<int32_t>, op_desc.GetAttr("shape"));
shape_tensor =
Add1DConstantLayer(shape, output_name + "_shape_tensor_");
shape_rank = shape.size();
}
} else if (op_type_ == "expand_as_v2") {
if (inputs.find("Y") != inputs.end()) {
shape_tensor = engine_->GetITensor(op_desc.Input("Y")[0]);
shape_rank = shape_tensor->getDimensions().nbDims;
} else {
std::vector<int32_t> shape = PADDLE_GET_CONST(
std::vector<int32_t>, op_desc.GetAttr("target_shape"));
shape_tensor =
Add1DConstantLayer(shape, output_name + "_target_shape_tensor_");
shape_rank = shape.size();
}
shape_tensor = Concat(shape_tensors);
shape_rank = shape_size;
} else {
std::vector<int32_t> shape =
PADDLE_GET_CONST(std::vector<int32_t>, op_desc.GetAttr("shape"));
shape_tensor = Add1DConstantLayer(shape, output_name + "_shape_tensor_");
shape_rank = shape.size();
}
nvinfer1::ITensor* input_shape_tensor;
......@@ -68,8 +83,7 @@ class ExpandV2OpConverter : public OpConverter {
input_shape_tensor = Shape(input);
}
auto* shuffle = TRT_ENGINE_ADD_LAYER(engine_, Shuffle, *input);
shuffle->setInput(1, *input_shape_tensor);
auto* newInputTensor = Reshape(input, input_shape_tensor);
std::vector<int32_t> start_vec(shape_rank, 0);
nvinfer1::Dims start;
......@@ -91,13 +105,26 @@ class ExpandV2OpConverter : public OpConverter {
auto strides_tensor = Min(one_tensor, input_sub_tensor);
auto layer = TRT_ENGINE_ADD_LAYER(
engine_, Slice, *shuffle->getOutput(0), start, size, stride);
engine_, Slice, *newInputTensor, start, size, stride);
layer->setInput(1, *starts_tensor);
layer->setInput(2, *sizes_tensor);
layer->setInput(3, *strides_tensor);
RreplenishLayerAndOutput(layer, "expand_v2", {output_name}, test_mode);
RreplenishLayerAndOutput(layer, op_type_, {output_name}, test_mode);
}
protected:
std::string op_type_;
};
class ExpandV2OpConverter : public ExpandOpConverter {
public:
ExpandV2OpConverter() { op_type_ = "expand_v2"; }
};
class ExpandAsV2OpConverter : public ExpandOpConverter {
public:
ExpandAsV2OpConverter() { op_type_ = "expand_as_v2"; }
};
} // namespace tensorrt
......@@ -105,3 +132,4 @@ class ExpandV2OpConverter : public OpConverter {
} // namespace paddle
REGISTER_TRT_OP_CONVERTER(expand_v2, ExpandV2OpConverter);
REGISTER_TRT_OP_CONVERTER(expand_as_v2, ExpandAsV2OpConverter);
......@@ -2654,11 +2654,35 @@ struct SimpleOpTypeSetTeller : public Teller {
}
}
if (op_type == "expand_v2") {
if (op_type == "expand_as_v2" || op_type == "expand_v2") {
if (!with_dynamic_shape) {
VLOG(3) << "the " << op_type
<< "does not support "
"static shape yet";
return false;
}
if (!desc.HasAttr("shape")) {
auto inputs = desc.Inputs();
if (op_type == "expand_as_v2") {
if (!desc.HasAttr("target_shape") && inputs.find("Y") == inputs.end()) {
VLOG(3)
<< "expand_as_v2 op need have input(Y) or attr(target_shape). ";
return false;
}
} else if (op_type == "expand_v2") {
if (!desc.HasAttr("shape") && inputs.find("Shape") == inputs.end() &&
inputs.find("expand_shapes_tensor") == inputs.end()) {
VLOG(3) << "expand_v2 op need have input(Shape) or "
"input(expand_shapes_tensor) or attr(shape) . ";
return false;
}
}
auto* block = desc.Block();
if (block == nullptr) {
VLOG(3) << "The block desc is nullptr, we can't continue to analyze. "
"Developers need to check whether block_desc is passed in "
"the pass.";
return false;
}
}
......@@ -2921,6 +2945,7 @@ struct SimpleOpTypeSetTeller : public Teller {
"skip_merge_layernorm",
"lookup_table_v2",
"expand_v2",
"expand_as_v2",
"fuse_eleadd_transpose",
"skip_groupnorm_act",
"preln_groupnorm_act",
......@@ -3080,6 +3105,7 @@ struct SimpleOpTypeSetTeller : public Teller {
"lookup_table",
"lookup_table_v2",
"expand_v2",
"expand_as_v2",
"fuse_eleadd_transpose",
"skip_groupnorm_act",
"preln_groupnorm_act",
......
# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import unittest
from functools import partial
from typing import Any, Dict, List
import numpy as np
from program_config import ProgramConfig, TensorConfig
from trt_layer_auto_scan_test import TrtLayerAutoScanTest
import paddle.inference as paddle_infer
class TrtConvertExpandASV2Test(TrtLayerAutoScanTest):
def is_program_valid(self, program_config: ProgramConfig) -> bool:
attrs = [
program_config.ops[i].attrs for i in range(len(program_config.ops))
]
if len(attrs[0]['target_shape']) < self.dims:
return False
if self.dims == 1:
if len(attrs[0]['target_shape']) == 4:
return False
return True
def sample_program_configs(self):
def generate_input1(attrs: List[Dict[str, Any]]):
if self.dims == 4:
self.input_shape = [1, 8, 1, 32]
return np.random.random([1, 8, 1, 32]).astype(np.float32)
elif self.dims == 3:
self.input_shape = [1, 32, 32]
return np.random.random([1, 32, 32]).astype(np.float32)
elif self.dims == 2:
self.input_shape = [1, 32]
return np.random.random([1, 32]).astype(np.float32)
elif self.dims == 1:
self.input_shape = [32]
return np.random.random([32]).astype(np.float32)
for dims in [1, 2, 3, 4]:
for shape in [
[10, 8, 32, 32],
[2, 8, 32, 32],
[8, 32, 32],
[2, 32],
[32],
]:
dics = [
{
"target_shape": shape,
},
]
self.dims = dims
ops_config = [
{
"op_type": "expand_as_v2",
"op_inputs": {"X": ["expand_v2_input"]},
"op_outputs": {"Out": ["expand_v2_out"]},
"op_attrs": dics[0],
}
]
ops = self.generate_op_config(ops_config)
program_config = ProgramConfig(
ops=ops,
weights={},
inputs={
"expand_v2_input": TensorConfig(
data_gen=partial(generate_input1, dics)
)
},
outputs=["expand_v2_out"],
)
yield program_config
def sample_predictor_configs(
self, program_config
) -> (paddle_infer.Config, List[int], float):
def generate_dynamic_shape(attrs):
if self.dims == 4:
self.dynamic_shape.min_input_shape = {
"expand_v2_input": [1, 8, 1, 32]
}
self.dynamic_shape.max_input_shape = {
"expand_v2_input": [10, 8, 1, 32]
}
self.dynamic_shape.opt_input_shape = {
"expand_v2_input": [1, 8, 1, 32]
}
elif self.dims == 3:
self.dynamic_shape.min_input_shape = {
"expand_v2_input": [1, 32, 32]
}
self.dynamic_shape.max_input_shape = {
"expand_v2_input": [8, 32, 32]
}
self.dynamic_shape.opt_input_shape = {
"expand_v2_input": [1, 32, 32]
}
elif self.dims == 2:
self.dynamic_shape.min_input_shape = {
"expand_v2_input": [1, 32]
}
self.dynamic_shape.max_input_shape = {
"expand_v2_input": [4, 32]
}
self.dynamic_shape.opt_input_shape = {
"expand_v2_input": [1, 32]
}
elif self.dims == 1:
self.dynamic_shape.min_input_shape = {"expand_v2_input": [32]}
self.dynamic_shape.max_input_shape = {"expand_v2_input": [64]}
self.dynamic_shape.opt_input_shape = {"expand_v2_input": [32]}
def clear_dynamic_shape():
self.dynamic_shape.min_input_shape = {}
self.dynamic_shape.max_input_shape = {}
self.dynamic_shape.opt_input_shape = {}
def generate_trt_nodes_num(attrs, dynamic_shape):
if dynamic_shape:
return 1, 2
else:
return 0, 3
attrs = [
program_config.ops[i].attrs for i in range(len(program_config.ops))
]
clear_dynamic_shape()
# for dynamic_shape
generate_dynamic_shape(attrs)
self.trt_param.precision = paddle_infer.PrecisionType.Float32
yield self.create_inference_config(), generate_trt_nodes_num(
attrs, True
), 1e-5
self.trt_param.precision = paddle_infer.PrecisionType.Half
yield self.create_inference_config(), generate_trt_nodes_num(
attrs, True
), 1e-3
def add_skip_trt_case(self):
pass
def test(self):
self.add_skip_trt_case()
self.run_test()
class TrtConvertExpandV2Test2(TrtLayerAutoScanTest):
def is_program_valid(self, program_config: ProgramConfig) -> bool:
attrs = [
program_config.ops[i].attrs for i in range(len(program_config.ops))
]
return True
def sample_program_configs(self):
def generate_input1(attrs: List[Dict[str, Any]]):
if self.dims == 1:
self.input_shape = [1]
return np.random.random([1]).astype(np.float32)
for dims in [1]:
for shape in [[10]]:
dics = [
{
"target_shape": shape,
},
]
self.dims = dims
dics_intput = [
{"X": ["expand_v2_input"], "Y": ["shapeT1_data"]},
]
ops_config = [
{
"op_type": "fill_constant",
"op_inputs": {},
"op_outputs": {"Out": ["shapeT1_data"]},
"op_attrs": {
"dtype": 2,
"str_value": "10",
"shape": [1],
},
},
{
"op_type": "expand_as_v2",
"op_inputs": dics_intput[0],
"op_outputs": {"Out": ["expand_v2_out"]},
"op_attrs": dics[0],
},
]
ops = self.generate_op_config(ops_config)
program_config = ProgramConfig(
ops=ops,
weights={},
inputs={
"expand_v2_input": TensorConfig(
data_gen=partial(generate_input1, dics)
)
},
outputs=["expand_v2_out"],
)
yield program_config
def sample_predictor_configs(
self, program_config
) -> (paddle_infer.Config, List[int], float):
def generate_dynamic_shape():
if self.dims == 1:
self.dynamic_shape.min_input_shape = {"expand_v2_input": [1]}
self.dynamic_shape.max_input_shape = {"expand_v2_input": [1]}
self.dynamic_shape.opt_input_shape = {"expand_v2_input": [1]}
def clear_dynamic_shape():
self.dynamic_shape.min_input_shape = {}
self.dynamic_shape.max_input_shape = {}
self.dynamic_shape.opt_input_shape = {}
clear_dynamic_shape()
# for dynamic_shape
generate_dynamic_shape()
self.trt_param.precision = paddle_infer.PrecisionType.Float32
# fill_constant will be folded by constnt folding pass!
yield self.create_inference_config(), (1, 2), 1e-5
self.trt_param.precision = paddle_infer.PrecisionType.Half
yield self.create_inference_config(), (1, 2), 1e-3
def add_skip_trt_case(self):
pass
def test(self):
self.add_skip_trt_case()
self.run_test()
if __name__ == "__main__":
unittest.main()
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册