diff --git a/paddle/fluid/inference/tensorrt/convert/expand_v2_op.cc b/paddle/fluid/inference/tensorrt/convert/expand_v2_op.cc index e491e2861bf43ae06a77a29b4588af4a3194b5d3..464b294e34b222ed5b8f3a2936f519b2a5fe2704 100644 --- a/paddle/fluid/inference/tensorrt/convert/expand_v2_op.cc +++ b/paddle/fluid/inference/tensorrt/convert/expand_v2_op.cc @@ -33,21 +33,41 @@ class ExpandV2OpConverter : public OpConverter { void operator()(const framework::proto::OpDesc& op, const framework::Scope& scope, bool test_mode) override { + VLOG(3) << "convert a paddle expand_v2 op to trt expand layer."; framework::OpDesc op_desc(op, nullptr); auto* input = engine_->GetITensor(op_desc.Input("X")[0]); + auto inputs = op_desc.Inputs(); auto input_dims = input->getDimensions(); auto output_name = op_desc.Output("Out")[0]; auto rank = input_dims.nbDims; - std::vector shape = - PADDLE_GET_CONST(std::vector, op_desc.GetAttr("shape")); - int32_t nbDims_num = shape.size(); - auto* shape_tensor = - Add1DConstantLayer(shape, output_name + "_shape_tensor_"); + nvinfer1::ITensor* shape_tensor = nullptr; + int32_t shape_rank = 0; + if (inputs.find("Shape") != inputs.end() && + op_desc.Input("Shape").size() >= 1) { + shape_tensor = engine_->GetITensor(op_desc.Input("Shape")[0]); + shape_rank = shape_tensor->getDimensions().d[0]; + } else if (inputs.find("expand_shapes_tensor") != inputs.end() && + op_desc.Input("expand_shapes_tensor").size() >= 1) { + int shape_size = op_desc.Input("expand_shapes_tensor").size(); + std::vector shape_tensors; + for (int i = 0; i < shape_size; ++i) { + shape_tensors.push_back( + engine_->GetITensor(op_desc.Input("expand_shapes_tensor")[i])); + } + shape_tensor = Concat(shape_tensors); + shape_rank = shape_size; + } else { + std::vector shape = + PADDLE_GET_CONST(std::vector, op_desc.GetAttr("shape")); + shape_tensor = Add1DConstantLayer(shape, output_name + "_shape_tensor_"); + shape_rank = shape.size(); + } + nvinfer1::ITensor* input_shape_tensor; - if (rank < nbDims_num) { + if (rank < shape_rank) { auto* one_rank_tensor = - Add1DConstantLayer(std::vector(nbDims_num - rank, 1), + Add1DConstantLayer(std::vector(shape_rank - rank, 1), output_name + "_one_rank_tensor_"); auto in_shape_tensor = Shape(input); std::vector itensors; @@ -61,16 +81,16 @@ class ExpandV2OpConverter : public OpConverter { auto* shuffle = TRT_ENGINE_ADD_LAYER(engine_, Shuffle, *input); shuffle->setInput(1, *input_shape_tensor); - std::vector start_vec(nbDims_num, 0); + std::vector start_vec(shape_rank, 0); nvinfer1::Dims start; - start.nbDims = nbDims_num; - for (int32_t i = 0; i < nbDims_num; ++i) { + start.nbDims = shape_rank; + for (int32_t i = 0; i < shape_rank; ++i) { start.d[i] = start_vec[i]; } nvinfer1::Dims size; - size.nbDims = nbDims_num; + size.nbDims = shape_rank; nvinfer1::Dims stride; - stride.nbDims = nbDims_num; + stride.nbDims = shape_rank; auto starts_tensor = Add1DConstantLayer(start_vec, output_name + "_start_tensor_"); diff --git a/paddle/fluid/inference/tensorrt/convert/nearest_interp_op.cc b/paddle/fluid/inference/tensorrt/convert/nearest_interp_op.cc index ecff6262298a68e40e81945effd3b2239c24c15d..9b0fce4758f88193c478d27fa621f5b3daca3969 100644 --- a/paddle/fluid/inference/tensorrt/convert/nearest_interp_op.cc +++ b/paddle/fluid/inference/tensorrt/convert/nearest_interp_op.cc @@ -12,15 +12,6 @@ limitations under the License. */ #include "paddle/fluid/framework/data_layout.h" #include "paddle/fluid/inference/tensorrt/convert/op_converter.h" -namespace paddle { -namespace framework { -class Scope; -namespace proto { -class OpDesc; -} // namespace proto -} // namespace framework -} // namespace paddle - namespace paddle { namespace inference { namespace tensorrt { @@ -34,6 +25,7 @@ class NearestInterpolateOpConverter : public OpConverter { framework::OpDesc op_desc(op, nullptr); + auto inputs = op_desc.Inputs(); std::string input_name = op_desc.Input("X").front(); std::string output_name = op_desc.Output("Out").front(); diff --git a/paddle/fluid/inference/tensorrt/convert/nearest_interp_v2_op.cc b/paddle/fluid/inference/tensorrt/convert/nearest_interp_v2_op.cc index 346f5edb7c1bb7de63e3f29f5e3a67d129382aa8..165e037748e05e80f017caa0cd35c529e97c789c 100644 --- a/paddle/fluid/inference/tensorrt/convert/nearest_interp_v2_op.cc +++ b/paddle/fluid/inference/tensorrt/convert/nearest_interp_v2_op.cc @@ -38,6 +38,7 @@ class NearestInterpolateV2OpConverter : public OpConverter { std::string output_name = op_desc.Output("Out").front(); auto input = engine_->GetITensor(input_name); + auto inputs = op_desc.Inputs(); auto data_layout = phi::StringToDataLayout( PADDLE_GET_CONST(std::string, op_desc.GetAttr("data_layout"))); @@ -73,8 +74,22 @@ class NearestInterpolateV2OpConverter : public OpConverter { scale_w = static_cast(out_w) / static_cast(in_dim.d[w_axis]); } else { - scale_h = scale[0]; - scale_w = scale[1]; + if (scale.size() >= 2) { + scale_h = scale[0]; + scale_w = scale[1]; + } + } + + // Priority: Input(SizeTensor) > attr(out_h/out_w) > attr(scale) + nvinfer1::ITensor* outsize_tensor = nullptr; + if (engine_->with_dynamic_shape() && + inputs.find("SizeTensor") != inputs.end()) { + if (op_desc.Input("SizeTensor").size() >= 2) { + auto* outsize_h = engine_->GetITensor(op_desc.Input("SizeTensor")[0]); + auto* outsize_w = engine_->GetITensor(op_desc.Input("SizeTensor")[1]); + outsize_tensor = + Concat(std::vector{outsize_h, outsize_w}); + } } if (engine_->with_dynamic_shape()) { @@ -94,7 +109,27 @@ class NearestInterpolateV2OpConverter : public OpConverter { PADDLE_THROW(platform::errors::InvalidArgument( "Data layout must be NCHW or NHWC.")); } - layer->setScales(scales.data(), scales.size()); + + if (engine_->with_dynamic_shape()) { + if (outsize_tensor != nullptr) { + std::vector outsize_itensors; + auto* input_shape = Shape(input); + outsize_itensors.push_back(GetEleTensorOfShape(input_shape, 0)); + + if (data_layout == phi::DataLayout::kNCHW) { + outsize_itensors.push_back(GetEleTensorOfShape(input_shape, 1)); + outsize_itensors.push_back(outsize_tensor); + } else if (data_layout == phi::DataLayout::kNHWC) { + outsize_itensors.push_back(outsize_tensor); + outsize_itensors.push_back(GetEleTensorOfShape(input_shape, 3)); + } + layer->setInput(1, *Concat(outsize_itensors)); + } else { + layer->setScales(scales.data(), scales.size()); + } + } else { + layer->setScales(scales.data(), scales.size()); + } RreplenishLayerAndOutput( layer, "nearest_interp_v2", {output_name}, test_mode); diff --git a/paddle/fluid/inference/tensorrt/op_teller.cc b/paddle/fluid/inference/tensorrt/op_teller.cc index 66bfe56f355d9026bf6f648a376da06f147e6a45..b49ebbff55d80d44d84c79a03e5a0f923d04c37b 100644 --- a/paddle/fluid/inference/tensorrt/op_teller.cc +++ b/paddle/fluid/inference/tensorrt/op_teller.cc @@ -676,7 +676,7 @@ struct SimpleOpTypeSetTeller : public Teller { if (!has_attrs) return false; } - if (op_type == "arg_max") { + if (op_type == "arg_max" || op_type == "arg_min") { if (!desc.HasAttr("axis", /*with_attr_var=*/false)) { VLOG(3) << "Skip to convert into TRT while found Attribute('axis') is " "Variable type in arg_max."; @@ -691,21 +691,6 @@ struct SimpleOpTypeSetTeller : public Teller { if (axis == 0 || flatten || (dtype != 2 && dtype != 3)) return false; } - if (op_type == "arg_min") { - if (!desc.HasAttr("axis", /*with_attr_var=*/false)) { - VLOG(3) << "Skip to convert into TRT while found Attribute('axis') is " - "Variable type in arg_min."; - return false; - } - - int axis = desc.HasAttr("axis") - ? PADDLE_GET_CONST(int64_t, desc.GetAttr("axis")) - : -1; - bool flatten = PADDLE_GET_CONST(bool, desc.GetAttr("flatten")); - int dtype = PADDLE_GET_CONST(int, desc.GetAttr("dtype")); - if (axis == 0 || flatten || dtype != 2) return false; - } - if (op_type == "affine_channel") { if (!desc.HasAttr("data_layout")) return false; auto data_layout = phi::StringToDataLayout( @@ -836,6 +821,14 @@ struct SimpleOpTypeSetTeller : public Teller { auto interp_method = PADDLE_GET_CONST(std::string, desc.GetAttr("interp_method")); if (interp_method != "nearest") return false; + + auto resize_inputs = desc.Inputs(); + if (with_dynamic_shape && + resize_inputs.find("SizeTensor") != resize_inputs.end() && + desc.Input("SizeTensor").size() == 2) { + return true; + } + auto scale = PADDLE_GET_CONST(std::vector, desc.GetAttr("scale")); auto out_h = PADDLE_GET_CONST(int, desc.GetAttr("out_h")); auto out_w = PADDLE_GET_CONST(int, desc.GetAttr("out_w")); @@ -2292,7 +2285,8 @@ struct SimpleOpTypeSetTeller : public Teller { } } #endif - if (!((in_dtype == 5 || in_dtype == 4 || in_dtype == 2) && + if (!((in_dtype == 5 || in_dtype == 4 || in_dtype == 3 || + in_dtype == 2) && (out_dtype == 5 || out_dtype == 4 || out_dtype == 2))) { VLOG(3) << "only valid conversions are: " "(kFLOAT | kHALF | kINT32) -> (kFLOAT | kHALF | kINT32)"; @@ -2411,18 +2405,6 @@ struct SimpleOpTypeSetTeller : public Teller { if (!desc.HasAttr("shape")) { return false; } - auto expand_v2_inputs = desc.Inputs(); - if (expand_v2_inputs.find("Shape") != expand_v2_inputs.end()) { - if (desc.Input("Shape").size() >= 1) { - return false; - } - } - if (expand_v2_inputs.find("expand_shapes_tensor") != - expand_v2_inputs.end()) { - if (desc.Input("expand_shapes_tensor").size() >= 1) { - return false; - } - } } if (use_no_calib_int8) { diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_expand_v2.py b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_expand_v2.py index b78f427144e6007914cb4962fffd7867da06477e..49330b1fffc2baf8b80b4c5588bbec50d452faf0 100644 --- a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_expand_v2.py +++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_expand_v2.py @@ -249,9 +249,9 @@ class TrtConvertExpandV2Test2(TrtLayerAutoScanTest): generate_dynamic_shape() self.trt_param.precision = paddle_infer.PrecisionType.Float32 # fill_constant will be folded by constnt folding pass! - yield self.create_inference_config(), (0, 3), 1e-5 + yield self.create_inference_config(), (1, 2), 1e-5 self.trt_param.precision = paddle_infer.PrecisionType.Half - yield self.create_inference_config(), (0, 3), 1e-3 + yield self.create_inference_config(), (1, 2), 1e-3 def add_skip_trt_case(self): pass @@ -393,9 +393,9 @@ class TrtConvertExpandV2Test3(TrtLayerAutoScanTest): generate_dynamic_shape() self.trt_param.precision = paddle_infer.PrecisionType.Float32 # fill_constant will be folded by constnt folding pass! - yield self.create_inference_config(), (0, 3), 1e-5 + yield self.create_inference_config(), (1, 2), 1e-5 self.trt_param.precision = paddle_infer.PrecisionType.Half - yield self.create_inference_config(), (0, 3), 1e-3 + yield self.create_inference_config(), (1, 2), 1e-3 def add_skip_trt_case(self): pass diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_nearest_interp_v2.py b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_nearest_interp_v2.py index 197d1f3e0766a98506f31417a71bc62e8f8af6b1..cca538714783fdffd2668f90c04ceb1a511006d9 100644 --- a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_nearest_interp_v2.py +++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_nearest_interp_v2.py @@ -30,10 +30,16 @@ class TrtConvertNearestInterpV2Test(TrtLayerAutoScanTest): def generate_input(): return np.ones([1, 3, 32, 32]).astype(np.float32) + def generate_weight(): + return np.array([64]).astype(np.int32) + ops_config = [ { "op_type": "nearest_interp_v2", - "op_inputs": {"X": ["input_data"]}, + "op_inputs": { + "X": ["input_data"], + "SizeTensor": ["size_tensor_data0", "size_tensor_data1"], + }, "op_outputs": {"Out": ["interp_output_data"]}, "op_attrs": { "data_layout": "NCHW", @@ -51,7 +57,10 @@ class TrtConvertNearestInterpV2Test(TrtLayerAutoScanTest): ops = self.generate_op_config(ops_config) program_config = ProgramConfig( ops=ops, - weights={}, + weights={ + "size_tensor_data0": TensorConfig(data_gen=generate_weight), + "size_tensor_data1": TensorConfig(data_gen=generate_weight), + }, inputs={"input_data": TensorConfig(data_gen=generate_input)}, outputs=["interp_output_data"], )