From 0549d4af3c41c6013901a9c584ccac5236a07779 Mon Sep 17 00:00:00 2001 From: Pei Yang Date: Tue, 8 Jun 2021 12:41:25 +0800 Subject: [PATCH] Cherry pick deconv & jetson single arch (#33387) * fix conv2d_transpose trt bugs (#33242) * fix jetson arch when compiling with single arch (#33269) --- cmake/cuda.cmake | 18 +++++++++++--- .../inference/tensorrt/convert/conv2d_op.cc | 19 +++++++++------ .../ir/inference/test_trt_conv_pass.py | 24 +++++++++++++++++++ 3 files changed, 51 insertions(+), 10 deletions(-) diff --git a/cmake/cuda.cmake b/cmake/cuda.cmake index 7f2addb02d..59c9070d1a 100644 --- a/cmake/cuda.cmake +++ b/cmake/cuda.cmake @@ -95,11 +95,23 @@ function(select_nvcc_arch_flags out_variable) if(${CUDA_ARCH_NAME} STREQUAL "Kepler") set(cuda_arch_bin "30 35") elseif(${CUDA_ARCH_NAME} STREQUAL "Maxwell") - set(cuda_arch_bin "50") + if (WITH_NV_JETSON) + set(cuda_arch_bin "53") + else() + set(cuda_arch_bin "50") + endif() elseif(${CUDA_ARCH_NAME} STREQUAL "Pascal") - set(cuda_arch_bin "60 61") + if (WITH_NV_JETSON) + set(cuda_arch_bin "62") + else() + set(cuda_arch_bin "60 61") + endif() elseif(${CUDA_ARCH_NAME} STREQUAL "Volta") - set(cuda_arch_bin "70") + if (WITH_NV_JETSON) + set(cuda_arch_bin "72") + else() + set(cuda_arch_bin "70") + endif() elseif(${CUDA_ARCH_NAME} STREQUAL "Turing") set(cuda_arch_bin "75") elseif(${CUDA_ARCH_NAME} STREQUAL "Ampere") diff --git a/paddle/fluid/inference/tensorrt/convert/conv2d_op.cc b/paddle/fluid/inference/tensorrt/convert/conv2d_op.cc index 61199724bc..6bbda6bb29 100644 --- a/paddle/fluid/inference/tensorrt/convert/conv2d_op.cc +++ b/paddle/fluid/inference/tensorrt/convert/conv2d_op.cc @@ -103,11 +103,18 @@ void ConvertConv2d(TensorRTEngine* engine, const framework::proto::OpDesc& op, TensorRTEngine::Weight bias{nvinfer1::DataType::kFLOAT, static_cast(bias_data), bias_size}; - auto* layer = fadd_layer(const_cast(X), n_output, n_input, - nv_ksize, weight, bias); - PADDLE_ENFORCE_NOT_NULL(layer, - platform::errors::Fatal("TensorRT create conv2d" - " layer error.")); + // In conv2d_transpose and depthwise_conv2d_transpose, + // output channels = filter_dims[1] * groups + auto* layer = (op_desc.Type() == "conv2d_transpose" || + op_desc.Type() == "depthwise_conv2d_transpose") + ? fadd_layer(const_cast(X), + n_input * groups, nv_ksize, weight, bias) + : fadd_layer(const_cast(X), n_output, + nv_ksize, weight, bias); + + PADDLE_ENFORCE_NOT_NULL( + layer, platform::errors::Fatal("TensorRT create conv2d/conv2d_transpose" + " layer failed.")); layer->setStride(nv_strides); layer->setPadding(nv_paddings); layer->setNbGroups(groups); @@ -134,7 +141,6 @@ class Conv2dOpConverter : public OpConverter { ConvertConv2d( engine_, op, scope, test_mode, [&](nvinfer1::ITensor* inputs, int n_output, /* Conv output maps */ - int n_input, /* Conv input maps */ nvinfer1::DimsHW& ksize, TensorRTEngine::Weight& weight, TensorRTEngine::Weight& bias) -> nvinfer1::IConvolutionLayer* { auto* layer = @@ -156,7 +162,6 @@ class Deconv2dOpConverter : public OpConverter { ConvertConv2d( engine_, op, scope, test_mode, [&](nvinfer1::ITensor* inputs, int n_output, /* Deconv input maps */ - int n_input, /* Deconv output maps */ nvinfer1::DimsHW& ksize, TensorRTEngine::Weight& weight, TensorRTEngine::Weight& bias) -> nvinfer1::IDeconvolutionLayer* { auto* layer = diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_conv_pass.py b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_conv_pass.py index adbb89523a..ebbf724d0b 100644 --- a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_conv_pass.py +++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_conv_pass.py @@ -36,6 +36,7 @@ class TensorRTSubgraphPassConvTest(InferencePassTest): groups=self.conv_groups, padding=self.conv_padding, bias_attr=False, + use_cudnn=self.use_cudnn, act=None) self.feeds = { "data": np.random.random([1, 6, 64, 64]).astype("float32"), @@ -50,6 +51,7 @@ class TensorRTSubgraphPassConvTest(InferencePassTest): self.conv_filter_size = 6 self.conv_groups = 3 self.conv_padding = [1, 1] + self.use_cudnn = True def test_check_output(self): if core.is_compiled_with_cuda(): @@ -65,6 +67,7 @@ class TensorRTSubgraphPassConvValidPaddingTest(TensorRTSubgraphPassConvTest): self.conv_filter_size = 6 self.conv_groups = 3 self.conv_padding = 'VALID' + self.use_cudnn = True class TensorRTSubgraphPassConvSamePaddingTest(InferencePassTest): @@ -73,6 +76,7 @@ class TensorRTSubgraphPassConvSamePaddingTest(InferencePassTest): self.conv_filter_size = 6 self.conv_groups = 3 self.conv_padding = 'SAME' + self.use_cudnn = True class TensorRTSubgraphPassDepthwiseConvTest(TensorRTSubgraphPassConvTest): @@ -81,6 +85,16 @@ class TensorRTSubgraphPassDepthwiseConvTest(TensorRTSubgraphPassConvTest): self.conv_filter_size = 6 self.conv_groups = 6 self.conv_padding = [1, 1] + self.use_cudnn = False + + +class TensorRTSubgraphPassDepthwiseConv2Test(TensorRTSubgraphPassConvTest): + def set_params(self): + self.conv_num_filters = 12 + self.conv_filter_size = 6 + self.conv_groups = 6 + self.conv_padding = [1, 1] + self.use_cudnn = False class TensorRTSubgraphPassConvTransposeTest(InferencePassTest): @@ -151,6 +165,16 @@ class TensorRTSubgraphPassConvTransposeMultiGroupTest( self.use_cudnn = True +class TensorRTSubgraphPassConvTranspose2Test( + TensorRTSubgraphPassConvTransposeTest): + def set_params(self): + self.conv_num_filters = 12 + self.conv_filter_size = 4 + self.conv_groups = 6 + self.conv_padding = [1, 1] + self.use_cudnn = False + + class TensorRTSubgraphPassDepthwiseConvTransposeTest( TensorRTSubgraphPassConvTransposeTest): def set_params(self): -- GitLab