未验证 提交 0549d4af 编写于 作者: P Pei Yang 提交者: GitHub

Cherry pick deconv & jetson single arch (#33387)

* fix conv2d_transpose trt bugs (#33242)

* fix jetson arch when compiling with single arch (#33269)
上级 ccabafa6
......@@ -95,11 +95,23 @@ function(select_nvcc_arch_flags out_variable)
if(${CUDA_ARCH_NAME} STREQUAL "Kepler")
set(cuda_arch_bin "30 35")
elseif(${CUDA_ARCH_NAME} STREQUAL "Maxwell")
set(cuda_arch_bin "50")
if (WITH_NV_JETSON)
set(cuda_arch_bin "53")
else()
set(cuda_arch_bin "50")
endif()
elseif(${CUDA_ARCH_NAME} STREQUAL "Pascal")
set(cuda_arch_bin "60 61")
if (WITH_NV_JETSON)
set(cuda_arch_bin "62")
else()
set(cuda_arch_bin "60 61")
endif()
elseif(${CUDA_ARCH_NAME} STREQUAL "Volta")
set(cuda_arch_bin "70")
if (WITH_NV_JETSON)
set(cuda_arch_bin "72")
else()
set(cuda_arch_bin "70")
endif()
elseif(${CUDA_ARCH_NAME} STREQUAL "Turing")
set(cuda_arch_bin "75")
elseif(${CUDA_ARCH_NAME} STREQUAL "Ampere")
......
......@@ -103,11 +103,18 @@ void ConvertConv2d(TensorRTEngine* engine, const framework::proto::OpDesc& op,
TensorRTEngine::Weight bias{nvinfer1::DataType::kFLOAT,
static_cast<void*>(bias_data), bias_size};
auto* layer = fadd_layer(const_cast<nvinfer1::ITensor*>(X), n_output, n_input,
nv_ksize, weight, bias);
PADDLE_ENFORCE_NOT_NULL(layer,
platform::errors::Fatal("TensorRT create conv2d"
" layer error."));
// In conv2d_transpose and depthwise_conv2d_transpose,
// output channels = filter_dims[1] * groups
auto* layer = (op_desc.Type() == "conv2d_transpose" ||
op_desc.Type() == "depthwise_conv2d_transpose")
? fadd_layer(const_cast<nvinfer1::ITensor*>(X),
n_input * groups, nv_ksize, weight, bias)
: fadd_layer(const_cast<nvinfer1::ITensor*>(X), n_output,
nv_ksize, weight, bias);
PADDLE_ENFORCE_NOT_NULL(
layer, platform::errors::Fatal("TensorRT create conv2d/conv2d_transpose"
" layer failed."));
layer->setStride(nv_strides);
layer->setPadding(nv_paddings);
layer->setNbGroups(groups);
......@@ -134,7 +141,6 @@ class Conv2dOpConverter : public OpConverter {
ConvertConv2d(
engine_, op, scope, test_mode,
[&](nvinfer1::ITensor* inputs, int n_output, /* Conv output maps */
int n_input, /* Conv input maps */
nvinfer1::DimsHW& ksize, TensorRTEngine::Weight& weight,
TensorRTEngine::Weight& bias) -> nvinfer1::IConvolutionLayer* {
auto* layer =
......@@ -156,7 +162,6 @@ class Deconv2dOpConverter : public OpConverter {
ConvertConv2d(
engine_, op, scope, test_mode,
[&](nvinfer1::ITensor* inputs, int n_output, /* Deconv input maps */
int n_input, /* Deconv output maps */
nvinfer1::DimsHW& ksize, TensorRTEngine::Weight& weight,
TensorRTEngine::Weight& bias) -> nvinfer1::IDeconvolutionLayer* {
auto* layer =
......
......@@ -36,6 +36,7 @@ class TensorRTSubgraphPassConvTest(InferencePassTest):
groups=self.conv_groups,
padding=self.conv_padding,
bias_attr=False,
use_cudnn=self.use_cudnn,
act=None)
self.feeds = {
"data": np.random.random([1, 6, 64, 64]).astype("float32"),
......@@ -50,6 +51,7 @@ class TensorRTSubgraphPassConvTest(InferencePassTest):
self.conv_filter_size = 6
self.conv_groups = 3
self.conv_padding = [1, 1]
self.use_cudnn = True
def test_check_output(self):
if core.is_compiled_with_cuda():
......@@ -65,6 +67,7 @@ class TensorRTSubgraphPassConvValidPaddingTest(TensorRTSubgraphPassConvTest):
self.conv_filter_size = 6
self.conv_groups = 3
self.conv_padding = 'VALID'
self.use_cudnn = True
class TensorRTSubgraphPassConvSamePaddingTest(InferencePassTest):
......@@ -73,6 +76,7 @@ class TensorRTSubgraphPassConvSamePaddingTest(InferencePassTest):
self.conv_filter_size = 6
self.conv_groups = 3
self.conv_padding = 'SAME'
self.use_cudnn = True
class TensorRTSubgraphPassDepthwiseConvTest(TensorRTSubgraphPassConvTest):
......@@ -81,6 +85,16 @@ class TensorRTSubgraphPassDepthwiseConvTest(TensorRTSubgraphPassConvTest):
self.conv_filter_size = 6
self.conv_groups = 6
self.conv_padding = [1, 1]
self.use_cudnn = False
class TensorRTSubgraphPassDepthwiseConv2Test(TensorRTSubgraphPassConvTest):
def set_params(self):
self.conv_num_filters = 12
self.conv_filter_size = 6
self.conv_groups = 6
self.conv_padding = [1, 1]
self.use_cudnn = False
class TensorRTSubgraphPassConvTransposeTest(InferencePassTest):
......@@ -151,6 +165,16 @@ class TensorRTSubgraphPassConvTransposeMultiGroupTest(
self.use_cudnn = True
class TensorRTSubgraphPassConvTranspose2Test(
TensorRTSubgraphPassConvTransposeTest):
def set_params(self):
self.conv_num_filters = 12
self.conv_filter_size = 4
self.conv_groups = 6
self.conv_padding = [1, 1]
self.use_cudnn = False
class TensorRTSubgraphPassDepthwiseConvTransposeTest(
TensorRTSubgraphPassConvTransposeTest):
def set_params(self):
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册