未验证 提交 da6e5143 编写于 作者: B baoachun 提交者: GitHub

fix wrong trt dim when input dim is 2 (#36614) (#36732)

* fix wrong trt dim when input dim is 2

* update leaky_relu and instance_norm converter unit test

* add instance_norm input dim check
上级 30ce925c
...@@ -116,6 +116,17 @@ nvinfer1::Dims Vec2TRT_Dims(const std::vector<T>& shape, std::string input, ...@@ -116,6 +116,17 @@ nvinfer1::Dims Vec2TRT_Dims(const std::vector<T>& shape, std::string input,
input, ShapeStr(shape))); input, ShapeStr(shape)));
} }
return nvinfer1::Dims2(shape[1], shape[2]); return nvinfer1::Dims2(shape[1], shape[2]);
} else if (shape.size() == 2UL) {
if (shape[1] == -1) {
PADDLE_THROW(platform::errors::InvalidArgument(
"The input [%s] shape of trt subgraph is %s, please enable "
"trt dynamic_shape mode by SetTRTDynamicShapeInfo.",
input, ShapeStr(shape)));
}
nvinfer1::Dims dims;
dims.nbDims = 1;
dims.d[0] = shape[1];
return dims;
} }
return nvinfer1::Dims3(shape[1], 1, 1); return nvinfer1::Dims3(shape[1], 1, 1);
} else { } else {
......
...@@ -1036,6 +1036,22 @@ bool OpTeller::Tell(const framework::ir::Node* node, bool use_no_calib_int8, ...@@ -1036,6 +1036,22 @@ bool OpTeller::Tell(const framework::ir::Node* node, bool use_no_calib_int8,
<< desc.Output("Y").size(); << desc.Output("Y").size();
return false; return false;
} }
auto* block = desc.Block();
if (block == nullptr) {
VLOG(3) << "The block desc is nullptr, we can't continue to analyze. "
"Developers need to check whether block_desc is passed in "
"the pass.";
return false;
}
auto x_var_name = desc.Input("X")[0];
auto* x_var_desc = block->FindVar(x_var_name);
const auto x_shape = x_var_desc->GetShape();
if (x_shape.size() != 4) {
VLOG(3) << "The instance_norm op only support 4-dimensional input in "
"tensorrt.";
return false;
}
} }
if (op_type == "leaky_relu") { if (op_type == "leaky_relu") {
......
...@@ -65,11 +65,6 @@ int InstanceNormPlugin::enqueue(int batch_size, const void *const *inputs, ...@@ -65,11 +65,6 @@ int InstanceNormPlugin::enqueue(int batch_size, const void *const *inputs,
#endif #endif
cudaStream_t stream) TRT_NOEXCEPT { cudaStream_t stream) TRT_NOEXCEPT {
const auto &input_dims = this->getInputDims(0); const auto &input_dims = this->getInputDims(0);
PADDLE_ENFORCE_EQ(input_dims.nbDims, 3,
platform::errors::InvalidArgument(
"Input Dims should be 3 (except the batch), got %d",
input_dims.nbDims));
int n = batch_size; int n = batch_size;
int c = input_dims.d[0]; int c = input_dims.d[0];
int h = input_dims.d[1]; int h = input_dims.d[1];
......
...@@ -555,10 +555,6 @@ if(WITH_GPU AND TENSORRT_FOUND) ...@@ -555,10 +555,6 @@ if(WITH_GPU AND TENSORRT_FOUND)
if (NOT EXISTS ${TEST_SPLIT_CONVERTER_MODEL}/split_converter.tgz) if (NOT EXISTS ${TEST_SPLIT_CONVERTER_MODEL}/split_converter.tgz)
inference_download_and_uncompress_without_verify(${TEST_SPLIT_CONVERTER_MODEL} ${INFERENCE_URL}/tensorrt_test "split_converter.tgz") inference_download_and_uncompress_without_verify(${TEST_SPLIT_CONVERTER_MODEL} ${INFERENCE_URL}/tensorrt_test "split_converter.tgz")
endif() endif()
set(TEST_INSTANCE_NORM_MODEL "${TRT_MODEL_INSTALL_DIR}/trt_instance_norm_test")
if (NOT EXISTS ${TEST_INSTANCE_NORM_MODEL}/instance_norm.tgz)
inference_download_and_uncompress_without_verify(${TEST_INSTANCE_NORM_MODEL} ${INFERENCE_URL}/tensorrt_test "instance_norm.tgz")
endif()
inference_analysis_test(trt_mobilenet_test SRCS trt_mobilenet_test.cc inference_analysis_test(trt_mobilenet_test SRCS trt_mobilenet_test.cc
EXTRA_DEPS ${INFERENCE_EXTRA_DEPS} EXTRA_DEPS ${INFERENCE_EXTRA_DEPS}
ARGS --infer_model=${TRT_MODEL_INSTALL_DIR}/trt_inference_test_models) ARGS --infer_model=${TRT_MODEL_INSTALL_DIR}/trt_inference_test_models)
...@@ -577,9 +573,6 @@ if(WITH_GPU AND TENSORRT_FOUND) ...@@ -577,9 +573,6 @@ if(WITH_GPU AND TENSORRT_FOUND)
inference_analysis_test(trt_split_converter_test SRCS trt_split_converter_test.cc inference_analysis_test(trt_split_converter_test SRCS trt_split_converter_test.cc
EXTRA_DEPS ${INFERENCE_EXTRA_DEPS} EXTRA_DEPS ${INFERENCE_EXTRA_DEPS}
ARGS --infer_model=${TEST_SPLIT_CONVERTER_MODEL}/) ARGS --infer_model=${TEST_SPLIT_CONVERTER_MODEL}/)
inference_analysis_test(trt_instance_norm_test SRCS trt_instance_norm_converter_test.cc
EXTRA_DEPS ${INFERENCE_EXTRA_DEPS}
ARGS --infer_model=${TEST_INSTANCE_NORM_MODEL}/)
inference_analysis_test(test_analyzer_capi_exp_gpu SRCS analyzer_capi_exp_gpu_tester.cc inference_analysis_test(test_analyzer_capi_exp_gpu SRCS analyzer_capi_exp_gpu_tester.cc
EXTRA_DEPS ${INFERENCE_EXTRA_DEPS} paddle_inference_c EXTRA_DEPS ${INFERENCE_EXTRA_DEPS} paddle_inference_c
ARGS --infer_model=${TRT_MODEL_INSTALL_DIR}/trt_inference_test_models) ARGS --infer_model=${TRT_MODEL_INSTALL_DIR}/trt_inference_test_models)
......
...@@ -24,8 +24,6 @@ import unittest ...@@ -24,8 +24,6 @@ import unittest
class TrtConvertInstanceNormTest(TrtLayerAutoScanTest): class TrtConvertInstanceNormTest(TrtLayerAutoScanTest):
def is_program_valid(self, program_config: ProgramConfig) -> bool: def is_program_valid(self, program_config: ProgramConfig) -> bool:
inputs = program_config.inputs
weights = program_config.weights
attrs = [ attrs = [
program_config.ops[i].attrs program_config.ops[i].attrs
for i in range(len(program_config.ops)) for i in range(len(program_config.ops))
...@@ -38,52 +36,71 @@ class TrtConvertInstanceNormTest(TrtLayerAutoScanTest): ...@@ -38,52 +36,71 @@ class TrtConvertInstanceNormTest(TrtLayerAutoScanTest):
def sample_program_configs(self): def sample_program_configs(self):
def generate_input1(attrs: List[Dict[str, Any]], shape_input): def generate_input1(attrs: List[Dict[str, Any]], shape_input):
return np.ones(shape_input).astype(np.float32) return np.random.random(shape_input).astype(np.float32)
def generate_input2(attrs: List[Dict[str, Any]], shape_input): def generate_input2(attrs: List[Dict[str, Any]], shape_input):
return np.ones(len(shape_input) - 1).astype(np.float32) return np.random.random(shape_input[1]).astype(np.float32)
for epsilon in [0.0005, -1, 1]: for batch in [1, 2, 4]:
dics = [{"epsilon": epsilon}] for shape_input in [[batch, 16], [batch, 32, 64],
[batch, 16, 32, 64]]:
ops_config = [{ self.in_dim = len(shape_input)
"op_type": "instance_norm", for epsilon in [0.0005, -1, 1]:
"op_inputs": { dics = [{"epsilon": epsilon}]
"X": ["input_data"], ops_config = [{
"Scale": ["scale_data"], "op_type": "instance_norm",
"Bias": ["bias_data"] "op_inputs": {
}, "X": ["input_data"],
"op_outputs": { "Scale": ["scale_data"],
"Y": ["y_data"], "Bias": ["bias_data"]
"SavedMean": ["saved_mean_data"], },
"SavedVariance": ["saved_variance_data"] "op_outputs": {
}, "Y": ["y_data"],
"op_attrs": dics[0] "SavedMean": ["saved_mean_data"],
}] "SavedVariance": ["saved_variance_data"]
ops = self.generate_op_config(ops_config) },
shape_input = [1, 3, 64, 64] "op_attrs": dics[0]
program_config = ProgramConfig( }]
ops=ops, ops = self.generate_op_config(ops_config)
weights={ program_config = ProgramConfig(
"bias_data": TensorConfig(data_gen=partial( ops=ops,
generate_input2, dics, shape_input)), weights={
"scale_data": TensorConfig(data_gen=partial( "bias_data": TensorConfig(data_gen=partial(
generate_input2, dics, shape_input)) generate_input2, dics, shape_input)),
}, "scale_data": TensorConfig(data_gen=partial(
inputs={ generate_input2, dics, shape_input))
"input_data": TensorConfig(data_gen=partial( },
generate_input1, dics, shape_input)) inputs={
}, "input_data": TensorConfig(data_gen=partial(
outputs=["y_data"]) generate_input1, dics, shape_input))
},
yield program_config outputs=["y_data"])
yield program_config
def sample_predictor_configs( def sample_predictor_configs(
self, program_config) -> (paddle_infer.Config, List[int], float): self, program_config) -> (paddle_infer.Config, List[int], float):
def generate_dynamic_shape(attrs): def generate_dynamic_shape(attrs):
self.dynamic_shape.min_input_shape = {"input_data": [1, 3, 32, 32]} if self.in_dim == 2:
self.dynamic_shape.max_input_shape = {"input_data": [4, 3, 64, 64]} self.dynamic_shape.min_input_shape = {"input_data": [1, 4]}
self.dynamic_shape.opt_input_shape = {"input_data": [1, 3, 64, 64]} self.dynamic_shape.max_input_shape = {"input_data": [4, 64]}
self.dynamic_shape.opt_input_shape = {"input_data": [2, 16]}
elif self.in_dim == 3:
self.dynamic_shape.min_input_shape = {"input_data": [1, 1, 4]}
self.dynamic_shape.max_input_shape = {
"input_data": [4, 32, 256]
}
self.dynamic_shape.opt_input_shape = {"input_data": [2, 3, 32]}
elif self.in_dim == 4:
self.dynamic_shape.min_input_shape = {
"input_data": [1, 1, 4, 4]
}
self.dynamic_shape.max_input_shape = {
"input_data": [4, 32, 128, 256]
}
self.dynamic_shape.opt_input_shape = {
"input_data": [2, 3, 32, 32]
}
def clear_dynamic_shape(): def clear_dynamic_shape():
self.dynamic_shape.min_input_shape = {} self.dynamic_shape.min_input_shape = {}
...@@ -91,8 +108,7 @@ class TrtConvertInstanceNormTest(TrtLayerAutoScanTest): ...@@ -91,8 +108,7 @@ class TrtConvertInstanceNormTest(TrtLayerAutoScanTest):
self.dynamic_shape.opt_input_shape = {} self.dynamic_shape.opt_input_shape = {}
def generate_trt_nodes_num(attrs, dynamic_shape): def generate_trt_nodes_num(attrs, dynamic_shape):
inputs = program_config.inputs if dynamic_shape or self.in_dim != 4:
if dynamic_shape:
return 0, 3 return 0, 3
return 1, 2 return 1, 2
...@@ -108,7 +124,7 @@ class TrtConvertInstanceNormTest(TrtLayerAutoScanTest): ...@@ -108,7 +124,7 @@ class TrtConvertInstanceNormTest(TrtLayerAutoScanTest):
attrs, False), 1e-5 attrs, False), 1e-5
self.trt_param.precision = paddle_infer.PrecisionType.Half self.trt_param.precision = paddle_infer.PrecisionType.Half
yield self.create_inference_config(), generate_trt_nodes_num( yield self.create_inference_config(), generate_trt_nodes_num(
attrs, False), 1e-2 attrs, False), 1e-5
# for dynamic_shape # for dynamic_shape
generate_dynamic_shape(attrs) generate_dynamic_shape(attrs)
...@@ -117,7 +133,7 @@ class TrtConvertInstanceNormTest(TrtLayerAutoScanTest): ...@@ -117,7 +133,7 @@ class TrtConvertInstanceNormTest(TrtLayerAutoScanTest):
True), 1e-5 True), 1e-5
self.trt_param.precision = paddle_infer.PrecisionType.Half self.trt_param.precision = paddle_infer.PrecisionType.Half
yield self.create_inference_config(), generate_trt_nodes_num(attrs, yield self.create_inference_config(), generate_trt_nodes_num(attrs,
True), 1e-2 True), 1e-5
def test(self): def test(self):
self.run_test() self.run_test()
......
...@@ -27,46 +27,59 @@ class TrtConvertLeakyReluTest(TrtLayerAutoScanTest): ...@@ -27,46 +27,59 @@ class TrtConvertLeakyReluTest(TrtLayerAutoScanTest):
return True return True
def sample_program_configs(self): def sample_program_configs(self):
def generate_input1(attrs: List[Dict[str, Any]]): def generate_input1(shape):
return np.ones([1, 3, 64, 64]).astype(np.float32) return np.random.random(shape).astype(np.float32)
for alpha in [0.02, 1.0, 100.0, -1.0, 0.0]: for batch in [1, 2]:
for X_scale in [1.0, 100.0, 0.01, -0.1, 0.0]: for shape in [[batch, 64], [batch, 32, 64], [batch, 8, 32, 32]]:
dics = [{ self.input_dim = len(shape)
"alpha": alpha, for alpha in [0.02, 1.0, 100.0, -1.0, 0.0]:
"use_mkldnn": True, dics = [{"alpha": alpha}]
"enable_int8": True, ops_config = [{
"X_scale": X_scale "op_type": "leaky_relu",
}] "op_inputs": {
"X": ["input_data"],
ops_config = [{ },
"op_type": "leaky_relu", "op_outputs": {
"op_inputs": { "Out": ["y_data"],
"X": ["input_data"], },
}, "op_attrs": dics[0]
"op_outputs": { }]
"Out": ["y_data"], ops = self.generate_op_config(ops_config)
}, program_config = ProgramConfig(
"op_attrs": dics[0] ops=ops,
}] weights={},
ops = self.generate_op_config(ops_config) inputs={
program_config = ProgramConfig( "input_data": TensorConfig(data_gen=partial(
ops=ops, generate_input1, shape))
weights={}, },
inputs={ outputs=["y_data"])
"input_data":
TensorConfig(data_gen=partial(generate_input1, dics)) yield program_config
},
outputs=["y_data"])
yield program_config
def sample_predictor_configs( def sample_predictor_configs(
self, program_config) -> (paddle_infer.Config, List[int], float): self, program_config) -> (paddle_infer.Config, List[int], float):
def generate_dynamic_shape(attrs): def generate_dynamic_shape(attrs):
self.dynamic_shape.min_input_shape = {"input_data": [1, 3, 32, 32]} if self.input_dim == 2:
self.dynamic_shape.max_input_shape = {"input_data": [4, 3, 64, 64]} self.dynamic_shape.min_input_shape = {"input_data": [1, 8]}
self.dynamic_shape.opt_input_shape = {"input_data": [4, 3, 64, 64]} self.dynamic_shape.max_input_shape = {"input_data": [64, 128]}
self.dynamic_shape.opt_input_shape = {"input_data": [2, 16]}
elif self.input_dim == 3:
self.dynamic_shape.min_input_shape = {"input_data": [1, 8, 8]}
self.dynamic_shape.max_input_shape = {
"input_data": [64, 128, 256]
}
self.dynamic_shape.opt_input_shape = {"input_data": [2, 16, 64]}
elif self.input_dim == 4:
self.dynamic_shape.min_input_shape = {
"input_data": [1, 8, 8, 4]
}
self.dynamic_shape.max_input_shape = {
"input_data": [64, 64, 128, 128]
}
self.dynamic_shape.opt_input_shape = {
"input_data": [2, 16, 64, 32]
}
def clear_dynamic_shape(): def clear_dynamic_shape():
self.dynamic_shape.min_input_shape = {} self.dynamic_shape.min_input_shape = {}
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册