diff --git a/paddle/fluid/inference/tensorrt/convert/conv2d_op.cc b/paddle/fluid/inference/tensorrt/convert/conv2d_op.cc index a8387e675984c06930e31568ff231774a9542263..19a5fbfe53885b89c07c44b9bfa69278d1dca91e 100644 --- a/paddle/fluid/inference/tensorrt/convert/conv2d_op.cc +++ b/paddle/fluid/inference/tensorrt/convert/conv2d_op.cc @@ -16,6 +16,16 @@ limitations under the License. */ #include "paddle/fluid/inference/tensorrt/engine.h" #include "paddle/phi/common/data_type.h" +namespace paddle { +namespace framework { +class Scope; + +namespace proto { +class OpDesc; +} // namespace proto +} // namespace framework +} // namespace paddle + namespace paddle { namespace inference { namespace tensorrt { @@ -35,11 +45,37 @@ void ConvertConv2d(TensorRTEngine* engine, auto* X = engine->GetITensor(op_desc.Input("Input").front()); std::string filter_var_name = op_desc.Input("Filter").front(); auto* Y_v = scope.FindVar(filter_var_name); - PADDLE_ENFORCE_NOT_NULL( - Y_v, - platform::errors::NotFound("Can not find %s presistale var in scope.", - filter_var_name)); - auto* Y_t = Y_v->GetMutable(); + phi::DenseTensor* Y_t = nullptr; + nvinfer1::ITensor* filter = nullptr; + int n_output; + int n_input; + int filter_h; + int filter_w; + if (Y_v) { + Y_t = Y_v->GetMutable(); + PADDLE_ENFORCE_EQ( + Y_t->dims().size(), + 4UL, + platform::errors::InvalidArgument( + "The conv2d filter's dims size should be 4, but got %d", + Y_t->dims().size())); + n_output = Y_t->dims()[0]; + n_input = Y_t->dims()[1]; + filter_h = Y_t->dims()[2]; + filter_w = Y_t->dims()[3]; + } else { + filter = engine->GetITensor(op_desc.Input("Filter").front()); + PADDLE_ENFORCE_EQ( + filter->getDimensions().nbDims, + 4UL, + platform::errors::InvalidArgument( + "The conv2d filter's dims size should be 4, but got %d", + filter->getDimensions().nbDims)); + n_output = filter->getDimensions().d[0]; + n_input = filter->getDimensions().d[1]; + filter_h = filter->getDimensions().d[2]; + filter_w = filter->getDimensions().d[3]; + } bool enable_int8 = op_desc.HasAttr("enable_int8"); @@ -49,17 +85,6 @@ void ConvertConv2d(TensorRTEngine* engine, engine->SetTensorDynamicRange(X, in_scale); #endif } - - PADDLE_ENFORCE_EQ(Y_t->dims().size(), - 4UL, - platform::errors::InvalidArgument( - "The conv2d filter's dims size should be 4, but got %d", - Y_t->dims().size())); - - const int n_output = Y_t->dims()[0]; - const int n_input = Y_t->dims()[1]; - const int filter_h = Y_t->dims()[2]; - const int filter_w = Y_t->dims()[3]; const int groups = PADDLE_GET_CONST(int, op_desc.GetAttr("groups")); const std::vector dilations = PADDLE_GET_CONST(std::vector, op_desc.GetAttr("dilations")); @@ -99,9 +124,10 @@ void ConvertConv2d(TensorRTEngine* engine, nv_post_paddings.d[0] = paddings[1]; nv_post_paddings.d[1] = paddings[3]; } - - auto weight = engine->GetTrtWeight(op_desc.Input("Filter").front(), *Y_t); - + TensorRTEngine::Weight weight(nvinfer1::DataType::kFLOAT, nullptr, 0); + if (Y_v) { + weight = engine->GetTrtWeight(op_desc.Input("Filter").front(), *Y_t); + } TensorRTEngine::Weight bias; bias.SetDataType(weight.get().type); bias.SetCount(0); @@ -135,6 +161,9 @@ void ConvertConv2d(TensorRTEngine* engine, layer->setStrideNd(nv_strides); layer->setPrePadding(nv_pre_paddings); + + if (!Y_v) layer->setInput(1, *filter); + if (!output_padding.empty()) { nv_post_paddings.d[0] -= output_padding[0]; nv_post_paddings.d[1] -= output_padding[1]; diff --git a/paddle/fluid/inference/tensorrt/op_teller.cc b/paddle/fluid/inference/tensorrt/op_teller.cc index c682e2f2e89826462c6a1b5b873783efbc3feab3..cfca8038eebe7cd3fe4c7809438d79ee5456a862 100644 --- a/paddle/fluid/inference/tensorrt/op_teller.cc +++ b/paddle/fluid/inference/tensorrt/op_teller.cc @@ -355,9 +355,13 @@ struct SimpleOpTypeSetTeller : public Teller { if (block) { auto* filter_var_desc = block->FindVar(desc.Input("Filter")[0]); if (!filter_var_desc->Persistable()) { - VLOG(3) << "Trt not support filter is a intermediate tensor in " - "conv2d op."; +#if IS_TRT_VERSION_GE(8600) +#else + LOG(INFO) + << "Trt below 8.6 not support conv2d's filter is a intermedoate " + "tensor in conv2d op, please upgarde your TenroRT."; return false; +#endif } } } diff --git a/test/ir/inference/test_trt_convert_conv2d.py b/test/ir/inference/test_trt_convert_conv2d.py index 79099e7491bd786f3d35b51bdc751d45aeb3fb6f..432ad97f76ad29d22d597cfcc395bb716d932421 100644 --- a/test/ir/inference/test_trt_convert_conv2d.py +++ b/test/ir/inference/test_trt_convert_conv2d.py @@ -205,5 +205,169 @@ class TrtConvertConv2dTest(TrtLayerAutoScanTest): self.run_test(quant=True) +class TrtConvertConv2dNotPersistableTest(TrtLayerAutoScanTest): + def is_program_valid(self, program_config: ProgramConfig) -> bool: + inputs = program_config.inputs + weights = program_config.weights + attrs = [ + program_config.ops[i].attrs for i in range(len(program_config.ops)) + ] + + if ( + inputs['input_data'].shape[1] + != inputs['weight_data'].shape[1] * attrs[0]['groups'] + ): + return False + + ver = paddle_infer.get_trt_compile_version() + if ver[0] * 1000 + ver[1] * 100 + ver[0] * 10 < 8600: + return False + + return True + + def sample_program_configs(self): + self.trt_param.workspace_size = 1073741824 + + def generate_input1(attrs: List[Dict[str, Any]]): + return ( + np.random.random(attrs[0]['input_shape']).astype(np.float32) + - 0.5 + ) + + def generate_data(attrs: List[Dict[str, Any]]): + return ( + np.random.random(attrs[0]['weight_shape']).astype(np.float32) + - 0.5 + ) + + input_shapes = [[1, 32, 128, 128]] + ocs = [64] + kernel_sizes = [[3, 3]] + strides_options = [[2, 2]] + paddings_options = [[1, 1]] + groups_options = [1] + padding_altorithm_options = ['EXPLICIT'] + dilations_options = [[1, 1]] + data_format_options = ['NCHW'] + + configurations = [ + input_shapes, + ocs, + kernel_sizes, + strides_options, + paddings_options, + groups_options, + padding_altorithm_options, + dilations_options, + data_format_options, + ] + + for ( + input_shape, + oc, + kernel_size, + strides, + paddings, + groups, + padding_algorithm, + dilations, + data_format, + ) in itertools.product(*configurations): + ic = input_shape[1] + attrs = [ + { + "data_fromat": data_format, + "dilations": dilations, + "padding_algorithm": padding_algorithm, + "groups": groups, + "paddings": paddings, + "strides": strides, + "data_format": data_format, + # below attrs are used for my convience. + "input_shape": input_shape, + "weight_shape": [ + oc, + ic // groups, + kernel_size[0], + kernel_size[1], + ], + }, + ] + + ops_config = [ + { + "op_type": "conv2d", + "op_inputs": { + "Input": ["input_data"], + "Filter": ["weight_data"], + }, + "op_outputs": {"Output": ["conv_output_data"]}, + "op_attrs": attrs[0], + }, + ] + + ops = self.generate_op_config(ops_config) + + program_config = ProgramConfig( + ops=ops, + weights={}, + inputs={ + "input_data": TensorConfig( + data_gen=partial(generate_input1, attrs) + ), + "weight_data": TensorConfig( + data_gen=partial(generate_data, attrs) + ), + }, + outputs=["conv_output_data"], + ) + + yield program_config + + def sample_predictor_configs( + self, program_config + ) -> (paddle_infer.Config, List[int], float): + def clear_dynamic_shape(): + self.dynamic_shape.min_input_shape = {} + self.dynamic_shape.max_input_shape = {} + self.dynamic_shape.opt_input_shape = {} + + def generate_dynamic_shape(attrs): + self.dynamic_shape.min_input_shape = { + "input_data": attrs[0]["input_shape"], + "weight_data": attrs[0]["weight_shape"], + } + self.dynamic_shape.max_input_shape = { + "input_data": attrs[0]["input_shape"], + "weight_data": attrs[0]["weight_shape"], + } + self.dynamic_shape.opt_input_shape = { + "input_data": attrs[0]["input_shape"], + "weight_data": attrs[0]["weight_shape"], + } + + def generate_trt_nodes_num(attrs, dynamic_shape): + return 1, 3 + + attrs = [ + program_config.ops[i].attrs for i in range(len(program_config.ops)) + ] + + # for dynamic_shape + generate_dynamic_shape(attrs) + self.trt_param.precision = paddle_infer.PrecisionType.Float32 + yield self.create_inference_config(), generate_trt_nodes_num( + attrs, True + ), (1e-2, 1e-2) + + self.trt_param.precision = paddle_infer.PrecisionType.Half + yield self.create_inference_config(), generate_trt_nodes_num( + attrs, True + ), (1e-2, 1e-2) + + def test(self): + self.run_test() + + if __name__ == "__main__": unittest.main()