From 7cdce09b5b53b395a758752f59ff79060129b2ba Mon Sep 17 00:00:00 2001 From: ccrrong <101700995+ccrrong@users.noreply.github.com> Date: Thu, 4 Aug 2022 15:20:28 +0800 Subject: [PATCH] [cherry pick] add cast trt convert (#44837) * add cast trt convert * skip cast trt convert when input dtype is bool * code format * fix bug * update unittest * fix bug --- .../fluid/inference/api/analysis_predictor.cc | 1 + .../inference/tensorrt/convert/CMakeLists.txt | 95 ++++++++----- .../inference/tensorrt/convert/cast_op.cc | 66 +++++++++ paddle/fluid/inference/tensorrt/op_teller.cc | 78 ++++++++--- .../ir/inference/test_trt_convert_cast.py | 126 ++++++++++++++++++ 5 files changed, 317 insertions(+), 49 deletions(-) create mode 100644 paddle/fluid/inference/tensorrt/convert/cast_op.cc create mode 100644 python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_cast.py diff --git a/paddle/fluid/inference/api/analysis_predictor.cc b/paddle/fluid/inference/api/analysis_predictor.cc index 4358795d939..af4cc0b83ef 100644 --- a/paddle/fluid/inference/api/analysis_predictor.cc +++ b/paddle/fluid/inference/api/analysis_predictor.cc @@ -1793,6 +1793,7 @@ USE_TRT_CONVERTER(multiclass_nms3); USE_TRT_CONVERTER(nearest_interp); USE_TRT_CONVERTER(nearest_interp_v2); USE_TRT_CONVERTER(bilinear_interp_v2); +USE_TRT_CONVERTER(cast); USE_TRT_CONVERTER(reshape); USE_TRT_CONVERTER(reduce_sum); USE_TRT_CONVERTER(gather_nd); diff --git a/paddle/fluid/inference/tensorrt/convert/CMakeLists.txt b/paddle/fluid/inference/tensorrt/convert/CMakeLists.txt index aacecbdadbf..0ea12084b6b 100644 --- a/paddle/fluid/inference/tensorrt/convert/CMakeLists.txt +++ b/paddle/fluid/inference/tensorrt/convert/CMakeLists.txt @@ -1,33 +1,66 @@ # Add TRT tests -nv_library(tensorrt_converter - SRCS matmul_op.cc conv2d_op.cc fc_op.cc pool2d_op.cc elementwise_op.cc - batch_norm_op.cc activation_op.cc softmax_op.cc concat_op.cc dropout_op.cc group_norm_op.cc - pad_op.cc split_op.cc prelu_op.cc leaky_relu_op.cc gelu_op.cc layer_norm_op.cc multihead_matmul_op.cc - shuffle_channel_op.cc swish_op.cc instance_norm_op.cc stack_op.cc transpose_op.cc flatten_op.cc flatten_contiguous_range_op.cc - emb_eltwise_layernorm.cc skip_layernorm.cc scale_op.cc slice_op.cc hard_sigmoid_op.cc hard_swish_op.cc clip_op.cc - gather_op.cc - bilinear_interp_v2_op.cc - anchor_generator_op.cc - yolo_box_op.cc - roi_align_op.cc - affine_channel_op.cc - multiclass_nms_op.cc - multiclass_nms3_op.cc - nearest_interp_op.cc - reshape_op.cc - reduce_op.cc - gather_nd_op.cc - tile_op.cc - conv3d_op.cc - mish_op.cc - nearest_interp_v2_op.cc - pool3d_op.cc - deformable_conv_op.cc - preln_emb_eltwise_layernorm.cc - strided_slice_op.cc - preln_skip_layernorm.cc - roll_op.cc - DEPS tensorrt_engine tensorrt_plugin operator scope framework_proto op_registry) +nv_library( + tensorrt_converter + SRCS matmul_op.cc + conv2d_op.cc + fc_op.cc + pool2d_op.cc + elementwise_op.cc + batch_norm_op.cc + activation_op.cc + softmax_op.cc + concat_op.cc + dropout_op.cc + group_norm_op.cc + pad_op.cc + split_op.cc + prelu_op.cc + leaky_relu_op.cc + gelu_op.cc + layer_norm_op.cc + multihead_matmul_op.cc + shuffle_channel_op.cc + swish_op.cc + instance_norm_op.cc + stack_op.cc + transpose_op.cc + flatten_op.cc + flatten_contiguous_range_op.cc + emb_eltwise_layernorm.cc + skip_layernorm.cc + scale_op.cc + slice_op.cc + hard_sigmoid_op.cc + hard_swish_op.cc + clip_op.cc + gather_op.cc + bilinear_interp_v2_op.cc + cast_op.cc + anchor_generator_op.cc + yolo_box_op.cc + roi_align_op.cc + affine_channel_op.cc + multiclass_nms_op.cc + multiclass_nms3_op.cc + nearest_interp_op.cc + reshape_op.cc + reduce_op.cc + gather_nd_op.cc + tile_op.cc + conv3d_op.cc + mish_op.cc + nearest_interp_v2_op.cc + pool3d_op.cc + deformable_conv_op.cc + preln_emb_eltwise_layernorm.cc + strided_slice_op.cc + preln_skip_layernorm.cc + roll_op.cc + DEPS tensorrt_engine tensorrt_plugin operator scope framework_proto + op_registry) -nv_test(test_op_converter SRCS test_op_converter.cc DEPS - paddle_framework ${GLOB_OPERATOR_DEPS} tensorrt_engine tensorrt_converter) +nv_test( + test_op_converter + SRCS test_op_converter.cc + DEPS paddle_framework ${GLOB_OPERATOR_DEPS} tensorrt_engine + tensorrt_converter) diff --git a/paddle/fluid/inference/tensorrt/convert/cast_op.cc b/paddle/fluid/inference/tensorrt/convert/cast_op.cc new file mode 100644 index 00000000000..0f504c1108b --- /dev/null +++ b/paddle/fluid/inference/tensorrt/convert/cast_op.cc @@ -0,0 +1,66 @@ +/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at +http://www.apache.org/licenses/LICENSE-2.0 +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/fluid/inference/tensorrt/convert/op_converter.h" + +namespace paddle { +namespace framework { +class Scope; + +namespace proto { +class OpDesc; +} // namespace proto +} // namespace framework +} // namespace paddle + +namespace paddle { +namespace inference { +namespace tensorrt { + +class CastOpConverter : public OpConverter { + public: + void operator()(const framework::proto::OpDesc& op, + const framework::Scope& scope, + bool test_mode) override { + VLOG(3) << "convert a cast op to tensorrt"; + framework::OpDesc op_desc(op, nullptr); + + auto* input = engine_->GetITensor(op_desc.Input("X")[0]); + auto out_dtype = BOOST_GET_CONST(int, op_desc.GetAttr("out_dtype")); + + auto* layer = TRT_ENGINE_ADD_LAYER(engine_, Identity, *input); + + switch (out_dtype) { + case 2: // INT32 = 2 + layer->getOutput(0)->setType(nvinfer1::DataType::kINT32); + break; + case 4: // FP16 = 4 + layer->getOutput(0)->setType(nvinfer1::DataType::kHALF); + break; + case 5: // FP32 = 5 + layer->getOutput(0)->setType(nvinfer1::DataType::kFLOAT); + break; + default: + LOG(ERROR) << "Unable to convert a fluid data type(" << out_dtype + << ") to a nvinfer DataType"; + break; + } + + auto output_name = op_desc.Output("Out")[0]; + RreplenishLayerAndOutput(layer, "cast", {output_name}, test_mode); + } +}; + +} // namespace tensorrt +} // namespace inference +} // namespace paddle + +REGISTER_TRT_OP_CONVERTER(cast, CastOpConverter); diff --git a/paddle/fluid/inference/tensorrt/op_teller.cc b/paddle/fluid/inference/tensorrt/op_teller.cc index b4e41ed0d11..22147d6dc63 100644 --- a/paddle/fluid/inference/tensorrt/op_teller.cc +++ b/paddle/fluid/inference/tensorrt/op_teller.cc @@ -49,7 +49,8 @@ struct SimpleOpTypeSetTeller : public Teller { #endif } - bool operator()(const std::string& op_type, const framework::OpDesc& desc, + bool operator()(const std::string& op_type, + const framework::OpDesc& desc, bool use_no_calib_int8) override { if (use_no_calib_int8) { return int8_teller_set.count(op_type); @@ -111,6 +112,7 @@ struct SimpleOpTypeSetTeller : public Teller { "mish", "nearest_interp_v2", "bilinear_interp_v2", + "cast", "pool3d", "deformable_conv", "relu6", @@ -175,6 +177,7 @@ struct SimpleOpTypeSetTeller : public Teller { "mish", "bilinear_interp_v2", "nearest_interp_v2", + "cast", "pool3d", "deformable_conv", "relu6", @@ -191,7 +194,8 @@ struct SimpleOpTypeSetTeller : public Teller { "multiclass_nms3"}; }; -bool OpTeller::Tell(const framework::ir::Node* node, bool use_no_calib_int8, +bool OpTeller::Tell(const framework::ir::Node* node, + bool use_no_calib_int8, bool with_dynamic_shape) { const std::string op_type = node->Op()->Type(); const framework::OpDesc desc = *node->Op(); @@ -706,8 +710,8 @@ bool OpTeller::Tell(const framework::ir::Node* node, bool use_no_calib_int8, } if (op_type == "nearest_interp") { - std::vector attrs{"interp_method", "align_corners", "scale", - "out_h", "out_w"}; + std::vector attrs{ + "interp_method", "align_corners", "scale", "out_h", "out_w"}; for (auto const attr : attrs) { if (!desc.HasAttr(attr)) return false; } @@ -747,9 +751,12 @@ bool OpTeller::Tell(const framework::ir::Node* node, bool use_no_calib_int8, } if (op_type == "nearest_interp_v2") { - std::vector attrs{"data_layout", "interp_method", - "align_corners", "scale", - "out_h", "out_w"}; + std::vector attrs{"data_layout", + "interp_method", + "align_corners", + "scale", + "out_h", + "out_w"}; for (auto const attr : attrs) { if (!desc.HasAttr(attr)) return false; } @@ -775,9 +782,12 @@ bool OpTeller::Tell(const framework::ir::Node* node, bool use_no_calib_int8, } if (op_type == "bilinear_interp_v2") { - std::vector attrs{"data_layout", "interp_method", - "align_corners", "scale", - "out_h", "out_w"}; + std::vector attrs{"data_layout", + "interp_method", + "align_corners", + "scale", + "out_h", + "out_w"}; for (auto const attr : attrs) { if (!desc.HasAttr(attr)) { VLOG(3) << "The op_type " << op_type << " doesn't have the attr " @@ -882,8 +892,8 @@ bool OpTeller::Tell(const framework::ir::Node* node, bool use_no_calib_int8, } if (op_type == "batch_norm") { - const std::vector bn_inputs = {"X", "Bias", "Mean", "Scale", - "Variance"}; + const std::vector bn_inputs = { + "X", "Bias", "Mean", "Scale", "Variance"}; for (unsigned int i = 0; i < bn_inputs.size(); i++) { if (desc.Input(bn_inputs[i]).size() != 1) { VLOG(3) << "Invalid " << bn_inputs[i] @@ -1458,8 +1468,10 @@ bool OpTeller::Tell(const framework::ir::Node* node, bool use_no_calib_int8, "the roi_align will change the batch size."; return false; } - std::vector attrs{"pooled_height", "pooled_width", - "spatial_scale", "sampling_ratio", + std::vector attrs{"pooled_height", + "pooled_width", + "spatial_scale", + "sampling_ratio", "aligned"}; for (auto const attr : attrs) { if (!desc.HasAttr(attr)) return false; @@ -1641,10 +1653,10 @@ bool OpTeller::Tell(const framework::ir::Node* node, bool use_no_calib_int8, auto x_var_name = desc.Input("X")[0]; auto* x_var_desc = block->FindVar(x_var_name); const auto x_shape = x_var_desc->GetShape(); - int input_num = std::accumulate(x_shape.begin() + 1, x_shape.end(), 1, - std::multiplies()); - int shape_num = std::accumulate(shape.begin() + 1, shape.end(), 1, - std::multiplies()); + int input_num = std::accumulate( + x_shape.begin() + 1, x_shape.end(), 1, std::multiplies()); + int shape_num = std::accumulate( + shape.begin() + 1, shape.end(), 1, std::multiplies()); if (input_num == shape_num) { return true; } @@ -1751,6 +1763,36 @@ bool OpTeller::Tell(const framework::ir::Node* node, bool use_no_calib_int8, } #endif + if (op_type == "cast") { +// trt 6015 result in Windows ppyolo_mbv3 TRT fp32 diff +#if !IS_TRT_VERSION_GE(7000) + return false; +#endif + if (!(desc.HasAttr("in_dtype") && desc.HasAttr("out_dtype"))) { + VLOG(3) << "the " << op_type + << " does not have attr (in_dtype or " + "out_dtype)"; + return false; + } + int in_dtype = BOOST_GET_CONST(int, desc.GetAttr("in_dtype")); + int out_dtype = BOOST_GET_CONST(int, desc.GetAttr("out_dtype")); + if ((in_dtype == 4 || in_dtype == 5) && out_dtype == 4) { + VLOG(3) << "unsupport data type conversion"; + return false; + } + if (in_dtype == 0) { + VLOG(3) << "do not support input data type as bool now"; + return false; + } + if (!((in_dtype == 5 || in_dtype == 4 || in_dtype == 2) && + (out_dtype == 5 || out_dtype == 4 || out_dtype == 2))) { + VLOG(3) + << "only valid conversions are: " + "(kFLOAT | kHALF | kINT32 | kBOOL) -> (kFLOAT | kHALF | kINT32)"; + return false; + } + } + if (op_type == "conv3d" || op_type == "conv3d_transpose") { if (desc.HasAttr("padding_algorithm")) { std::string padding_algorithm = diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_cast.py b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_cast.py new file mode 100644 index 00000000000..c434b6a9678 --- /dev/null +++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_cast.py @@ -0,0 +1,126 @@ +# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from trt_layer_auto_scan_test import TrtLayerAutoScanTest, SkipReasons +from program_config import TensorConfig, ProgramConfig +import unittest +import numpy as np +import paddle.inference as paddle_infer +from functools import partial +from typing import Optional, List, Callable, Dict, Any, Set + + +class TrtConvertCastTest(TrtLayerAutoScanTest): + + def is_program_valid(self, program_config: ProgramConfig) -> bool: + ver = paddle_infer.get_trt_compile_version() + if ver[0] * 1000 + ver[1] * 100 + ver[0] * 10 < 7000: + return False + + attrs = [ + program_config.ops[i].attrs for i in range(len(program_config.ops)) + ] + if attrs[0]['in_dtype'] == 0: + return False + if attrs[0]['in_dtype'] in [4, 5] and attrs[0]['out_dtype'] == 4: + return False + if attrs[0]['in_dtype'] not in [ + 2, 4, 5 + ] or attrs[0]['out_dtype'] not in [2, 4, 5]: + return False + return True + + def sample_program_configs(self): + + def generate_input(type): + if type == 0: + return np.ones([1, 3, 64, 64]).astype(np.bool) + elif type == 2: + return np.ones([1, 3, 64, 64]).astype(np.int32) + elif type == 4: + return np.ones([1, 3, 64, 64]).astype(np.float16) + else: + return np.ones([1, 3, 64, 64]).astype(np.float32) + + for in_dtype in [0, 2, 4, 5, 6]: + for out_dtype in [0, 2, 4, 5, 6]: + dics = [{"in_dtype": in_dtype, "out_dtype": out_dtype}] + + ops_config = [{ + "op_type": "cast", + "op_inputs": { + "X": ["input_data"] + }, + "op_outputs": { + "Out": ["cast_output_data"] + }, + "op_attrs": dics[0] + }] + ops = self.generate_op_config(ops_config) + + program_config = ProgramConfig( + ops=ops, + weights={}, + inputs={ + "input_data": + TensorConfig(data_gen=partial(generate_input, in_dtype)) + }, + outputs=["cast_output_data"]) + + yield program_config + + def sample_predictor_configs( + self, program_config) -> (paddle_infer.Config, List[int], float): + + def generate_dynamic_shape(attrs): + self.dynamic_shape.min_input_shape = {"input_data": [1, 3, 64, 64]} + self.dynamic_shape.max_input_shape = {"input_data": [4, 3, 64, 64]} + self.dynamic_shape.opt_input_shape = {"input_data": [1, 3, 64, 64]} + + def clear_dynamic_shape(): + self.dynamic_shape.min_input_shape = {} + self.dynamic_shape.max_input_shape = {} + self.dynamic_shape.opt_input_shape = {} + + def generate_trt_nodes_num(attrs, dynamic_shape): + return 1, 2 + + attrs = [ + program_config.ops[i].attrs for i in range(len(program_config.ops)) + ] + + # for static_shape + clear_dynamic_shape() + self.trt_param.precision = paddle_infer.PrecisionType.Float32 + yield self.create_inference_config(), generate_trt_nodes_num( + attrs, False), 1e-5 + self.trt_param.precision = paddle_infer.PrecisionType.Half + yield self.create_inference_config(), generate_trt_nodes_num( + attrs, False), 1e-2 + + # for dynamic_shape + generate_dynamic_shape(attrs) + self.trt_param.precision = paddle_infer.PrecisionType.Float32 + yield self.create_inference_config(), generate_trt_nodes_num( + attrs, True), 1e-5 + self.trt_param.precision = paddle_infer.PrecisionType.Half + yield self.create_inference_config(), generate_trt_nodes_num( + attrs, True), 1e-2 + + def test(self): + self.run_test() + + +if __name__ == "__main__": + unittest.main() -- GitLab