diff --git a/paddle/fluid/inference/tensorrt/engine.h b/paddle/fluid/inference/tensorrt/engine.h index e22c2488d3b8b63746ad9fd19eaa724ce2efa8f7..8cb0d36277def86ead541873f321c233af6e2104 100644 --- a/paddle/fluid/inference/tensorrt/engine.h +++ b/paddle/fluid/inference/tensorrt/engine.h @@ -116,6 +116,17 @@ nvinfer1::Dims Vec2TRT_Dims(const std::vector& shape, std::string input, input, ShapeStr(shape))); } return nvinfer1::Dims2(shape[1], shape[2]); + } else if (shape.size() == 2UL) { + if (shape[1] == -1) { + PADDLE_THROW(platform::errors::InvalidArgument( + "The input [%s] shape of trt subgraph is %s, please enable " + "trt dynamic_shape mode by SetTRTDynamicShapeInfo.", + input, ShapeStr(shape))); + } + nvinfer1::Dims dims; + dims.nbDims = 1; + dims.d[0] = shape[1]; + return dims; } return nvinfer1::Dims3(shape[1], 1, 1); } else { diff --git a/paddle/fluid/inference/tensorrt/op_teller.cc b/paddle/fluid/inference/tensorrt/op_teller.cc index 05eb85581f394bb086fb3a5e8a8c18160e7d0a6e..a5674b11bd322343da9ae8897960610fb9273295 100644 --- a/paddle/fluid/inference/tensorrt/op_teller.cc +++ b/paddle/fluid/inference/tensorrt/op_teller.cc @@ -1036,6 +1036,22 @@ bool OpTeller::Tell(const framework::ir::Node* node, bool use_no_calib_int8, << desc.Output("Y").size(); return false; } + + auto* block = desc.Block(); + if (block == nullptr) { + VLOG(3) << "The block desc is nullptr, we can't continue to analyze. " + "Developers need to check whether block_desc is passed in " + "the pass."; + return false; + } + auto x_var_name = desc.Input("X")[0]; + auto* x_var_desc = block->FindVar(x_var_name); + const auto x_shape = x_var_desc->GetShape(); + if (x_shape.size() != 4) { + VLOG(3) << "The instance_norm op only support 4-dimensional input in " + "tensorrt."; + return false; + } } if (op_type == "leaky_relu") { diff --git a/paddle/fluid/inference/tensorrt/plugin/instance_norm_op_plugin.cu b/paddle/fluid/inference/tensorrt/plugin/instance_norm_op_plugin.cu index b7c4fb7c99acfddbb095905be162ca9ff72ab803..a9a50543e7bb70d7abc04b405dd581ca0c9f71fe 100644 --- a/paddle/fluid/inference/tensorrt/plugin/instance_norm_op_plugin.cu +++ b/paddle/fluid/inference/tensorrt/plugin/instance_norm_op_plugin.cu @@ -65,11 +65,6 @@ int InstanceNormPlugin::enqueue(int batch_size, const void *const *inputs, #endif cudaStream_t stream) TRT_NOEXCEPT { const auto &input_dims = this->getInputDims(0); - - PADDLE_ENFORCE_EQ(input_dims.nbDims, 3, - platform::errors::InvalidArgument( - "Input Dims should be 3 (except the batch), got %d", - input_dims.nbDims)); int n = batch_size; int c = input_dims.d[0]; int h = input_dims.d[1]; diff --git a/paddle/fluid/inference/tests/api/CMakeLists.txt b/paddle/fluid/inference/tests/api/CMakeLists.txt index 11187a1c79fca32f9257a821d39d71dccf09ec3c..6fd3944a6c52809c924a149933c98bbdb31ebc2f 100644 --- a/paddle/fluid/inference/tests/api/CMakeLists.txt +++ b/paddle/fluid/inference/tests/api/CMakeLists.txt @@ -555,10 +555,6 @@ if(WITH_GPU AND TENSORRT_FOUND) if (NOT EXISTS ${TEST_SPLIT_CONVERTER_MODEL}/split_converter.tgz) inference_download_and_uncompress_without_verify(${TEST_SPLIT_CONVERTER_MODEL} ${INFERENCE_URL}/tensorrt_test "split_converter.tgz") endif() - set(TEST_INSTANCE_NORM_MODEL "${TRT_MODEL_INSTALL_DIR}/trt_instance_norm_test") - if (NOT EXISTS ${TEST_INSTANCE_NORM_MODEL}/instance_norm.tgz) - inference_download_and_uncompress_without_verify(${TEST_INSTANCE_NORM_MODEL} ${INFERENCE_URL}/tensorrt_test "instance_norm.tgz") - endif() inference_analysis_test(trt_mobilenet_test SRCS trt_mobilenet_test.cc EXTRA_DEPS ${INFERENCE_EXTRA_DEPS} ARGS --infer_model=${TRT_MODEL_INSTALL_DIR}/trt_inference_test_models) @@ -577,9 +573,6 @@ if(WITH_GPU AND TENSORRT_FOUND) inference_analysis_test(trt_split_converter_test SRCS trt_split_converter_test.cc EXTRA_DEPS ${INFERENCE_EXTRA_DEPS} ARGS --infer_model=${TEST_SPLIT_CONVERTER_MODEL}/) - inference_analysis_test(trt_instance_norm_test SRCS trt_instance_norm_converter_test.cc - EXTRA_DEPS ${INFERENCE_EXTRA_DEPS} - ARGS --infer_model=${TEST_INSTANCE_NORM_MODEL}/) inference_analysis_test(test_analyzer_capi_exp_gpu SRCS analyzer_capi_exp_gpu_tester.cc EXTRA_DEPS ${INFERENCE_EXTRA_DEPS} paddle_inference_c ARGS --infer_model=${TRT_MODEL_INSTALL_DIR}/trt_inference_test_models) diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_instance_norm.py b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_instance_norm.py index 3f7c2a0fae6f06e929d17d06dc8cb841dd792c17..acd920ccd57ae1f001c2e4028bf30659fab26021 100644 --- a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_instance_norm.py +++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_instance_norm.py @@ -24,8 +24,6 @@ import unittest class TrtConvertInstanceNormTest(TrtLayerAutoScanTest): def is_program_valid(self, program_config: ProgramConfig) -> bool: - inputs = program_config.inputs - weights = program_config.weights attrs = [ program_config.ops[i].attrs for i in range(len(program_config.ops)) @@ -38,52 +36,71 @@ class TrtConvertInstanceNormTest(TrtLayerAutoScanTest): def sample_program_configs(self): def generate_input1(attrs: List[Dict[str, Any]], shape_input): - return np.ones(shape_input).astype(np.float32) + return np.random.random(shape_input).astype(np.float32) def generate_input2(attrs: List[Dict[str, Any]], shape_input): - return np.ones(len(shape_input) - 1).astype(np.float32) - - for epsilon in [0.0005, -1, 1]: - dics = [{"epsilon": epsilon}] - - ops_config = [{ - "op_type": "instance_norm", - "op_inputs": { - "X": ["input_data"], - "Scale": ["scale_data"], - "Bias": ["bias_data"] - }, - "op_outputs": { - "Y": ["y_data"], - "SavedMean": ["saved_mean_data"], - "SavedVariance": ["saved_variance_data"] - }, - "op_attrs": dics[0] - }] - ops = self.generate_op_config(ops_config) - shape_input = [1, 3, 64, 64] - program_config = ProgramConfig( - ops=ops, - weights={ - "bias_data": TensorConfig(data_gen=partial( - generate_input2, dics, shape_input)), - "scale_data": TensorConfig(data_gen=partial( - generate_input2, dics, shape_input)) - }, - inputs={ - "input_data": TensorConfig(data_gen=partial( - generate_input1, dics, shape_input)) - }, - outputs=["y_data"]) - - yield program_config + return np.random.random(shape_input[1]).astype(np.float32) + + for batch in [1, 2, 4]: + for shape_input in [[batch, 16], [batch, 32, 64], + [batch, 16, 32, 64]]: + self.in_dim = len(shape_input) + for epsilon in [0.0005, -1, 1]: + dics = [{"epsilon": epsilon}] + ops_config = [{ + "op_type": "instance_norm", + "op_inputs": { + "X": ["input_data"], + "Scale": ["scale_data"], + "Bias": ["bias_data"] + }, + "op_outputs": { + "Y": ["y_data"], + "SavedMean": ["saved_mean_data"], + "SavedVariance": ["saved_variance_data"] + }, + "op_attrs": dics[0] + }] + ops = self.generate_op_config(ops_config) + program_config = ProgramConfig( + ops=ops, + weights={ + "bias_data": TensorConfig(data_gen=partial( + generate_input2, dics, shape_input)), + "scale_data": TensorConfig(data_gen=partial( + generate_input2, dics, shape_input)) + }, + inputs={ + "input_data": TensorConfig(data_gen=partial( + generate_input1, dics, shape_input)) + }, + outputs=["y_data"]) + + yield program_config def sample_predictor_configs( self, program_config) -> (paddle_infer.Config, List[int], float): def generate_dynamic_shape(attrs): - self.dynamic_shape.min_input_shape = {"input_data": [1, 3, 32, 32]} - self.dynamic_shape.max_input_shape = {"input_data": [4, 3, 64, 64]} - self.dynamic_shape.opt_input_shape = {"input_data": [1, 3, 64, 64]} + if self.in_dim == 2: + self.dynamic_shape.min_input_shape = {"input_data": [1, 4]} + self.dynamic_shape.max_input_shape = {"input_data": [4, 64]} + self.dynamic_shape.opt_input_shape = {"input_data": [2, 16]} + elif self.in_dim == 3: + self.dynamic_shape.min_input_shape = {"input_data": [1, 1, 4]} + self.dynamic_shape.max_input_shape = { + "input_data": [4, 32, 256] + } + self.dynamic_shape.opt_input_shape = {"input_data": [2, 3, 32]} + elif self.in_dim == 4: + self.dynamic_shape.min_input_shape = { + "input_data": [1, 1, 4, 4] + } + self.dynamic_shape.max_input_shape = { + "input_data": [4, 32, 128, 256] + } + self.dynamic_shape.opt_input_shape = { + "input_data": [2, 3, 32, 32] + } def clear_dynamic_shape(): self.dynamic_shape.min_input_shape = {} @@ -91,8 +108,7 @@ class TrtConvertInstanceNormTest(TrtLayerAutoScanTest): self.dynamic_shape.opt_input_shape = {} def generate_trt_nodes_num(attrs, dynamic_shape): - inputs = program_config.inputs - if dynamic_shape: + if dynamic_shape or self.in_dim != 4: return 0, 3 return 1, 2 @@ -108,7 +124,7 @@ class TrtConvertInstanceNormTest(TrtLayerAutoScanTest): attrs, False), 1e-5 self.trt_param.precision = paddle_infer.PrecisionType.Half yield self.create_inference_config(), generate_trt_nodes_num( - attrs, False), 1e-2 + attrs, False), 1e-5 # for dynamic_shape generate_dynamic_shape(attrs) @@ -117,7 +133,7 @@ class TrtConvertInstanceNormTest(TrtLayerAutoScanTest): True), 1e-5 self.trt_param.precision = paddle_infer.PrecisionType.Half yield self.create_inference_config(), generate_trt_nodes_num(attrs, - True), 1e-2 + True), 1e-5 def test(self): self.run_test() diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_leaky_relu.py b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_leaky_relu.py index 2a8206e58e00e397beaaf4f5c7d5b562380670a1..c647849fa7ee4bbf40ad35e527c7005d16008a5a 100644 --- a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_leaky_relu.py +++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_leaky_relu.py @@ -27,46 +27,59 @@ class TrtConvertLeakyReluTest(TrtLayerAutoScanTest): return True def sample_program_configs(self): - def generate_input1(attrs: List[Dict[str, Any]]): - return np.ones([1, 3, 64, 64]).astype(np.float32) - - for alpha in [0.02, 1.0, 100.0, -1.0, 0.0]: - for X_scale in [1.0, 100.0, 0.01, -0.1, 0.0]: - dics = [{ - "alpha": alpha, - "use_mkldnn": True, - "enable_int8": True, - "X_scale": X_scale - }] - - ops_config = [{ - "op_type": "leaky_relu", - "op_inputs": { - "X": ["input_data"], - }, - "op_outputs": { - "Out": ["y_data"], - }, - "op_attrs": dics[0] - }] - ops = self.generate_op_config(ops_config) - program_config = ProgramConfig( - ops=ops, - weights={}, - inputs={ - "input_data": - TensorConfig(data_gen=partial(generate_input1, dics)) - }, - outputs=["y_data"]) - - yield program_config + def generate_input1(shape): + return np.random.random(shape).astype(np.float32) + + for batch in [1, 2]: + for shape in [[batch, 64], [batch, 32, 64], [batch, 8, 32, 32]]: + self.input_dim = len(shape) + for alpha in [0.02, 1.0, 100.0, -1.0, 0.0]: + dics = [{"alpha": alpha}] + ops_config = [{ + "op_type": "leaky_relu", + "op_inputs": { + "X": ["input_data"], + }, + "op_outputs": { + "Out": ["y_data"], + }, + "op_attrs": dics[0] + }] + ops = self.generate_op_config(ops_config) + program_config = ProgramConfig( + ops=ops, + weights={}, + inputs={ + "input_data": TensorConfig(data_gen=partial( + generate_input1, shape)) + }, + outputs=["y_data"]) + + yield program_config def sample_predictor_configs( self, program_config) -> (paddle_infer.Config, List[int], float): def generate_dynamic_shape(attrs): - self.dynamic_shape.min_input_shape = {"input_data": [1, 3, 32, 32]} - self.dynamic_shape.max_input_shape = {"input_data": [4, 3, 64, 64]} - self.dynamic_shape.opt_input_shape = {"input_data": [4, 3, 64, 64]} + if self.input_dim == 2: + self.dynamic_shape.min_input_shape = {"input_data": [1, 8]} + self.dynamic_shape.max_input_shape = {"input_data": [64, 128]} + self.dynamic_shape.opt_input_shape = {"input_data": [2, 16]} + elif self.input_dim == 3: + self.dynamic_shape.min_input_shape = {"input_data": [1, 8, 8]} + self.dynamic_shape.max_input_shape = { + "input_data": [64, 128, 256] + } + self.dynamic_shape.opt_input_shape = {"input_data": [2, 16, 64]} + elif self.input_dim == 4: + self.dynamic_shape.min_input_shape = { + "input_data": [1, 8, 8, 4] + } + self.dynamic_shape.max_input_shape = { + "input_data": [64, 64, 128, 128] + } + self.dynamic_shape.opt_input_shape = { + "input_data": [2, 16, 64, 32] + } def clear_dynamic_shape(): self.dynamic_shape.min_input_shape = {}