未验证 提交 74206917 编写于 作者: I iamsonderr 提交者: GitHub

[Paddle-TRT] Support conv2d op enter into trt when filter is not a persistable tensor (#55246)

* support_conv2d

* remove comment

* check code style

* add former Test

* check code style

* add unittest

* fix log

* change unittest

---------
Co-authored-by: fgh431's avatarzhoutianzi666 <17801055074@163.com>
上级 7b19efe4
...@@ -16,6 +16,16 @@ limitations under the License. */ ...@@ -16,6 +16,16 @@ limitations under the License. */
#include "paddle/fluid/inference/tensorrt/engine.h" #include "paddle/fluid/inference/tensorrt/engine.h"
#include "paddle/phi/common/data_type.h" #include "paddle/phi/common/data_type.h"
namespace paddle {
namespace framework {
class Scope;
namespace proto {
class OpDesc;
} // namespace proto
} // namespace framework
} // namespace paddle
namespace paddle { namespace paddle {
namespace inference { namespace inference {
namespace tensorrt { namespace tensorrt {
...@@ -35,11 +45,37 @@ void ConvertConv2d(TensorRTEngine* engine, ...@@ -35,11 +45,37 @@ void ConvertConv2d(TensorRTEngine* engine,
auto* X = engine->GetITensor(op_desc.Input("Input").front()); auto* X = engine->GetITensor(op_desc.Input("Input").front());
std::string filter_var_name = op_desc.Input("Filter").front(); std::string filter_var_name = op_desc.Input("Filter").front();
auto* Y_v = scope.FindVar(filter_var_name); auto* Y_v = scope.FindVar(filter_var_name);
PADDLE_ENFORCE_NOT_NULL( phi::DenseTensor* Y_t = nullptr;
Y_v, nvinfer1::ITensor* filter = nullptr;
platform::errors::NotFound("Can not find %s presistale var in scope.", int n_output;
filter_var_name)); int n_input;
auto* Y_t = Y_v->GetMutable<phi::DenseTensor>(); int filter_h;
int filter_w;
if (Y_v) {
Y_t = Y_v->GetMutable<phi::DenseTensor>();
PADDLE_ENFORCE_EQ(
Y_t->dims().size(),
4UL,
platform::errors::InvalidArgument(
"The conv2d filter's dims size should be 4, but got %d",
Y_t->dims().size()));
n_output = Y_t->dims()[0];
n_input = Y_t->dims()[1];
filter_h = Y_t->dims()[2];
filter_w = Y_t->dims()[3];
} else {
filter = engine->GetITensor(op_desc.Input("Filter").front());
PADDLE_ENFORCE_EQ(
filter->getDimensions().nbDims,
4UL,
platform::errors::InvalidArgument(
"The conv2d filter's dims size should be 4, but got %d",
filter->getDimensions().nbDims));
n_output = filter->getDimensions().d[0];
n_input = filter->getDimensions().d[1];
filter_h = filter->getDimensions().d[2];
filter_w = filter->getDimensions().d[3];
}
bool enable_int8 = op_desc.HasAttr("enable_int8"); bool enable_int8 = op_desc.HasAttr("enable_int8");
...@@ -49,17 +85,6 @@ void ConvertConv2d(TensorRTEngine* engine, ...@@ -49,17 +85,6 @@ void ConvertConv2d(TensorRTEngine* engine,
engine->SetTensorDynamicRange(X, in_scale); engine->SetTensorDynamicRange(X, in_scale);
#endif #endif
} }
PADDLE_ENFORCE_EQ(Y_t->dims().size(),
4UL,
platform::errors::InvalidArgument(
"The conv2d filter's dims size should be 4, but got %d",
Y_t->dims().size()));
const int n_output = Y_t->dims()[0];
const int n_input = Y_t->dims()[1];
const int filter_h = Y_t->dims()[2];
const int filter_w = Y_t->dims()[3];
const int groups = PADDLE_GET_CONST(int, op_desc.GetAttr("groups")); const int groups = PADDLE_GET_CONST(int, op_desc.GetAttr("groups"));
const std::vector<int> dilations = const std::vector<int> dilations =
PADDLE_GET_CONST(std::vector<int>, op_desc.GetAttr("dilations")); PADDLE_GET_CONST(std::vector<int>, op_desc.GetAttr("dilations"));
...@@ -99,9 +124,10 @@ void ConvertConv2d(TensorRTEngine* engine, ...@@ -99,9 +124,10 @@ void ConvertConv2d(TensorRTEngine* engine,
nv_post_paddings.d[0] = paddings[1]; nv_post_paddings.d[0] = paddings[1];
nv_post_paddings.d[1] = paddings[3]; nv_post_paddings.d[1] = paddings[3];
} }
TensorRTEngine::Weight weight(nvinfer1::DataType::kFLOAT, nullptr, 0);
auto weight = engine->GetTrtWeight(op_desc.Input("Filter").front(), *Y_t); if (Y_v) {
weight = engine->GetTrtWeight(op_desc.Input("Filter").front(), *Y_t);
}
TensorRTEngine::Weight bias; TensorRTEngine::Weight bias;
bias.SetDataType(weight.get().type); bias.SetDataType(weight.get().type);
bias.SetCount(0); bias.SetCount(0);
...@@ -135,6 +161,9 @@ void ConvertConv2d(TensorRTEngine* engine, ...@@ -135,6 +161,9 @@ void ConvertConv2d(TensorRTEngine* engine,
layer->setStrideNd(nv_strides); layer->setStrideNd(nv_strides);
layer->setPrePadding(nv_pre_paddings); layer->setPrePadding(nv_pre_paddings);
if (!Y_v) layer->setInput(1, *filter);
if (!output_padding.empty()) { if (!output_padding.empty()) {
nv_post_paddings.d[0] -= output_padding[0]; nv_post_paddings.d[0] -= output_padding[0];
nv_post_paddings.d[1] -= output_padding[1]; nv_post_paddings.d[1] -= output_padding[1];
......
...@@ -355,9 +355,13 @@ struct SimpleOpTypeSetTeller : public Teller { ...@@ -355,9 +355,13 @@ struct SimpleOpTypeSetTeller : public Teller {
if (block) { if (block) {
auto* filter_var_desc = block->FindVar(desc.Input("Filter")[0]); auto* filter_var_desc = block->FindVar(desc.Input("Filter")[0]);
if (!filter_var_desc->Persistable()) { if (!filter_var_desc->Persistable()) {
VLOG(3) << "Trt not support filter is a intermediate tensor in " #if IS_TRT_VERSION_GE(8600)
"conv2d op."; #else
LOG(INFO)
<< "Trt below 8.6 not support conv2d's filter is a intermedoate "
"tensor in conv2d op, please upgarde your TenroRT.";
return false; return false;
#endif
} }
} }
} }
......
...@@ -205,5 +205,169 @@ class TrtConvertConv2dTest(TrtLayerAutoScanTest): ...@@ -205,5 +205,169 @@ class TrtConvertConv2dTest(TrtLayerAutoScanTest):
self.run_test(quant=True) self.run_test(quant=True)
class TrtConvertConv2dNotPersistableTest(TrtLayerAutoScanTest):
def is_program_valid(self, program_config: ProgramConfig) -> bool:
inputs = program_config.inputs
weights = program_config.weights
attrs = [
program_config.ops[i].attrs for i in range(len(program_config.ops))
]
if (
inputs['input_data'].shape[1]
!= inputs['weight_data'].shape[1] * attrs[0]['groups']
):
return False
ver = paddle_infer.get_trt_compile_version()
if ver[0] * 1000 + ver[1] * 100 + ver[0] * 10 < 8600:
return False
return True
def sample_program_configs(self):
self.trt_param.workspace_size = 1073741824
def generate_input1(attrs: List[Dict[str, Any]]):
return (
np.random.random(attrs[0]['input_shape']).astype(np.float32)
- 0.5
)
def generate_data(attrs: List[Dict[str, Any]]):
return (
np.random.random(attrs[0]['weight_shape']).astype(np.float32)
- 0.5
)
input_shapes = [[1, 32, 128, 128]]
ocs = [64]
kernel_sizes = [[3, 3]]
strides_options = [[2, 2]]
paddings_options = [[1, 1]]
groups_options = [1]
padding_altorithm_options = ['EXPLICIT']
dilations_options = [[1, 1]]
data_format_options = ['NCHW']
configurations = [
input_shapes,
ocs,
kernel_sizes,
strides_options,
paddings_options,
groups_options,
padding_altorithm_options,
dilations_options,
data_format_options,
]
for (
input_shape,
oc,
kernel_size,
strides,
paddings,
groups,
padding_algorithm,
dilations,
data_format,
) in itertools.product(*configurations):
ic = input_shape[1]
attrs = [
{
"data_fromat": data_format,
"dilations": dilations,
"padding_algorithm": padding_algorithm,
"groups": groups,
"paddings": paddings,
"strides": strides,
"data_format": data_format,
# below attrs are used for my convience.
"input_shape": input_shape,
"weight_shape": [
oc,
ic // groups,
kernel_size[0],
kernel_size[1],
],
},
]
ops_config = [
{
"op_type": "conv2d",
"op_inputs": {
"Input": ["input_data"],
"Filter": ["weight_data"],
},
"op_outputs": {"Output": ["conv_output_data"]},
"op_attrs": attrs[0],
},
]
ops = self.generate_op_config(ops_config)
program_config = ProgramConfig(
ops=ops,
weights={},
inputs={
"input_data": TensorConfig(
data_gen=partial(generate_input1, attrs)
),
"weight_data": TensorConfig(
data_gen=partial(generate_data, attrs)
),
},
outputs=["conv_output_data"],
)
yield program_config
def sample_predictor_configs(
self, program_config
) -> (paddle_infer.Config, List[int], float):
def clear_dynamic_shape():
self.dynamic_shape.min_input_shape = {}
self.dynamic_shape.max_input_shape = {}
self.dynamic_shape.opt_input_shape = {}
def generate_dynamic_shape(attrs):
self.dynamic_shape.min_input_shape = {
"input_data": attrs[0]["input_shape"],
"weight_data": attrs[0]["weight_shape"],
}
self.dynamic_shape.max_input_shape = {
"input_data": attrs[0]["input_shape"],
"weight_data": attrs[0]["weight_shape"],
}
self.dynamic_shape.opt_input_shape = {
"input_data": attrs[0]["input_shape"],
"weight_data": attrs[0]["weight_shape"],
}
def generate_trt_nodes_num(attrs, dynamic_shape):
return 1, 3
attrs = [
program_config.ops[i].attrs for i in range(len(program_config.ops))
]
# for dynamic_shape
generate_dynamic_shape(attrs)
self.trt_param.precision = paddle_infer.PrecisionType.Float32
yield self.create_inference_config(), generate_trt_nodes_num(
attrs, True
), (1e-2, 1e-2)
self.trt_param.precision = paddle_infer.PrecisionType.Half
yield self.create_inference_config(), generate_trt_nodes_num(
attrs, True
), (1e-2, 1e-2)
def test(self):
self.run_test()
if __name__ == "__main__": if __name__ == "__main__":
unittest.main() unittest.main()
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册