未验证 提交 a64bea0c 编写于 作者: S Shang Zhizhou 提交者: GitHub

fix trt plugin clone and initialize bugs in TRT7.1+ (#30709) (#30822)

Co-authored-by: Ntianshuo78520a <707759223@qq.com>
上级 d199edd8
......@@ -39,8 +39,27 @@ EmbEltwiseLayernormPluginDynamicImpl<
inline half fp32tofp16(float x) { return static_cast<half>(x); }
template <typename T>
void EmbEltwiseLayernormPluginDynamicImpl<T>::shareGPUData(
const EmbEltwiseLayernormPluginDynamicImplBase *anthor) {
auto *ptr =
dynamic_cast<const EmbEltwiseLayernormPluginDynamicImpl<T> *>(anthor);
if (!ptr->is_initialized_) {
return;
}
embs_gpu_ = ptr->embs_gpu_;
scale_gpu_ = ptr->scale_gpu_;
bias_gpu_ = ptr->bias_gpu_;
int input_num = embs_.size();
in_ptr_tensor_.Resize({input_num});
emb_ptr_tensor_.ShareDataWith(ptr->emb_ptr_tensor_);
}
template <typename T>
int EmbEltwiseLayernormPluginDynamicImpl<T>::initialize() {
if (is_initialized_) {
return 0;
}
embs_gpu_.resize(embs_.size());
for (int i = 0; i < embs_.size(); i++) {
if (embs_[i]) {
......@@ -77,13 +96,12 @@ int EmbEltwiseLayernormPluginDynamicImpl<T>::initialize() {
int input_num = embs_.size();
in_ptr_tensor_.Resize({input_num});
emb_ptr_tensor_.Resize({input_num});
cudaGetDevice(&device_id_);
auto emb_ptr_gpu_d =
emb_ptr_tensor_.mutable_data<int64_t>(platform::CUDAPlace(device_id_));
cudaMemcpy(emb_ptr_gpu_d, embs_gpu_.data(), sizeof(uintptr_t) * input_num,
cudaMemcpyHostToDevice);
is_initialized_ = true;
return 0;
}
......
......@@ -39,6 +39,8 @@ class EmbEltwiseLayernormPluginDynamicImplBase {
const nvinfer1::PluginTensorDesc* outputDesc,
const void* const* inputs, void* const* outputs,
void* workspace, cudaStream_t stream) = 0;
virtual void shareGPUData(
const EmbEltwiseLayernormPluginDynamicImplBase* anthor) = 0;
};
template <typename T>
......@@ -67,6 +69,7 @@ class EmbEltwiseLayernormPluginDynamicImpl
const nvinfer1::PluginTensorDesc* outputDesc,
const void* const* inputs, void* const* outputs, void* workspace,
cudaStream_t stream);
void shareGPUData(const EmbEltwiseLayernormPluginDynamicImplBase* anthor);
private:
std::vector<float*> embs_;
......@@ -87,6 +90,7 @@ class EmbEltwiseLayernormPluginDynamicImpl
framework::Tensor in_ptr_tensor_, emb_ptr_tensor_;
int device_id_{0};
uintptr_t old_input_ptr_{0};
bool is_initialized_{false};
};
class EmbEltwiseLayernormPluginDynamic : public DynamicPluginTensorRT {
......@@ -189,6 +193,7 @@ class EmbEltwiseLayernormPluginDynamic : public DynamicPluginTensorRT {
auto ptr = new EmbEltwiseLayernormPluginDynamic(
embs_, bias_, scale_, emb_sizes_, bias_size_, scale_size_, hidden_size_,
eps_, with_fp16_);
ptr->shareGPUData(this);
return ptr;
}
......@@ -295,6 +300,10 @@ class EmbEltwiseLayernormPluginDynamic : public DynamicPluginTensorRT {
bool own_host_buff_{false};
EmbEltwiseLayernormPluginDynamicImplBase* impl_{nullptr};
void shareGPUData(const EmbEltwiseLayernormPluginDynamic* anthor) {
impl_->shareGPUData(anthor->impl_);
}
};
class EmbEltwiseLayernormPluginV2Creator : public nvinfer1::IPluginCreator {
......
......@@ -47,13 +47,7 @@ InstanceNormPlugin *CreateInstanceNormPluginDeserialize(const void *buffer,
REGISTER_TRT_PLUGIN("instance_norm_plugin",
CreateInstanceNormPluginDeserialize);
int InstanceNormPlugin::initialize() {
platform::dynload::cudnnCreate(&handle_);
platform::dynload::cudnnCreateTensorDescriptor(&x_desc_);
platform::dynload::cudnnCreateTensorDescriptor(&y_desc_);
platform::dynload::cudnnCreateTensorDescriptor(&b_desc_);
return 0;
}
int InstanceNormPlugin::initialize() { return 0; }
nvinfer1::Dims InstanceNormPlugin::getOutputDimensions(
int index, const nvinfer1::Dims *inputDims, int nbInputs) {
......
......@@ -65,6 +65,10 @@ class InstanceNormPlugin : public PluginTensorRT {
"The instanceNorm's scale and bias should be the "
"same size. Got scale size = %d, but bias size = %d",
scale.size(), bias.size()));
platform::dynload::cudnnCreate(&handle_);
platform::dynload::cudnnCreateTensorDescriptor(&x_desc_);
platform::dynload::cudnnCreateTensorDescriptor(&y_desc_);
platform::dynload::cudnnCreateTensorDescriptor(&b_desc_);
}
// It was used for tensorrt deserialization.
......@@ -74,9 +78,19 @@ class InstanceNormPlugin : public PluginTensorRT {
DeserializeValue(&serialData, &serialLength, &eps_);
DeserializeValue(&serialData, &serialLength, &scale_);
DeserializeValue(&serialData, &serialLength, &bias_);
platform::dynload::cudnnCreate(&handle_);
platform::dynload::cudnnCreateTensorDescriptor(&x_desc_);
platform::dynload::cudnnCreateTensorDescriptor(&y_desc_);
platform::dynload::cudnnCreateTensorDescriptor(&b_desc_);
}
~InstanceNormPlugin() {}
~InstanceNormPlugin() {
platform::dynload::cudnnDestroy(handle_);
platform::dynload::cudnnDestroyTensorDescriptor(x_desc_);
platform::dynload::cudnnDestroyTensorDescriptor(y_desc_);
platform::dynload::cudnnDestroyTensorDescriptor(b_desc_);
}
int initialize() override;
InstanceNormPlugin *clone() const override {
......
......@@ -39,6 +39,13 @@ int PReluPlugin::initialize() {
return 0;
}
void PReluPlugin::terminate() {
if (p_gpu_weight_) {
cudaFree(p_gpu_weight_);
p_gpu_weight_ = nullptr;
}
}
nvinfer1::Dims PReluPlugin::getOutputDimensions(int index,
const nvinfer1::Dims *inputDims,
int nbInputs) {
......
......@@ -66,11 +66,14 @@ class PReluPlugin : public PluginTensorRT {
DeserializeValue(&serialData, &serialLength, &prelu_mode);
mode_ = std::string(prelu_mode);
}
~PReluPlugin() { cudaFree(p_gpu_weight_); }
~PReluPlugin() {}
int initialize() override;
void terminate() override;
PReluPlugin* clone() const override {
return new PReluPlugin(weight_.data(), weight_.size(), mode_);
auto* ptr = new PReluPlugin(weight_.data(), weight_.size(), mode_);
ptr->p_gpu_weight_ = p_gpu_weight_;
return ptr;
}
const char* getPluginType() const override { return "prelu_plugin"; }
......@@ -100,7 +103,7 @@ class PReluPluginDynamic : public DynamicPluginTensorRT {
DeserializeValue(&serialData, &serialLength, &prelu_mode);
mode_ = std::string(prelu_mode);
}
~PReluPluginDynamic() { cudaFree(p_gpu_weight_); }
~PReluPluginDynamic() {}
nvinfer1::IPluginV2DynamicExt* clone() const override {
auto ptr = new PReluPluginDynamic(weight_.data(), weight_.size(), mode_);
ptr->p_gpu_weight_ = p_gpu_weight_;
......
......@@ -40,6 +40,17 @@ int SkipLayerNormPluginDynamic::initialize() {
return 0;
}
void SkipLayerNormPluginDynamic::terminate() {
if (bias_gpu_) {
cudaFree(bias_gpu_);
bias_gpu_ = nullptr;
}
if (scale_gpu_) {
cudaFree(scale_gpu_);
scale_gpu_ = nullptr;
}
}
nvinfer1::DimsExprs SkipLayerNormPluginDynamic::getOutputDimensions(
int output_index, const nvinfer1::DimsExprs *inputs, int nb_inputs,
nvinfer1::IExprBuilder &expr_builder) {
......
......@@ -104,13 +104,14 @@ class SkipLayerNormPluginDynamic : public DynamicPluginTensorRT {
int nb_inputs) const override;
void destroy() override { delete this; }
void terminate() override;
private:
std::vector<float> bias_;
std::vector<float> scale_;
float* bias_gpu_;
float* scale_gpu_;
float* bias_gpu_{nullptr};
float* scale_gpu_{nullptr};
int bias_size_;
int scale_size_;
......
......@@ -62,6 +62,16 @@ nvinfer1::Dims SplitPlugin::getOutputDimensions(
return output_dims;
}
void SplitPlugin::shareData(const SplitPlugin* another) {
outer_rows_ = another->outer_rows_;
inner_cols_ = another->inner_cols_;
same_shape_ = another->same_shape_;
axis_shape_ = another->axis_shape_;
d_segment_offsets_ = another->d_segment_offsets_;
segment_offsets_ = another->segment_offsets_;
d_output_ptrs_.resize(another->d_output_ptrs_.size(), nullptr);
}
int SplitPlugin::initialize() {
PADDLE_ENFORCE_LE(axis_, nvinfer1::Dims::MAX_DIMS,
platform::errors::InvalidArgument(
......@@ -93,6 +103,9 @@ int SplitPlugin::initialize() {
return 0;
}
// nothing to release according to initialize
void SplitPlugin::terminate() {}
// The following part of the code refers to onnx-tensorrt
// https://github.com/onnx/onnx-tensorrt/blob/master/Split.cu
template <typename T>
......
......@@ -40,7 +40,9 @@ class SplitPlugin : public PluginTensorRT {
}
SplitPlugin* clone() const override {
return new SplitPlugin(axis_, output_length_, with_fp16_);
auto* ptr = new SplitPlugin(axis_, output_length_, with_fp16_);
ptr->shareData(this);
return ptr;
}
const char* getPluginType() const override { return "split_plugin"; }
......@@ -50,6 +52,7 @@ class SplitPlugin : public PluginTensorRT {
int num_inputs) override;
int initialize() override;
void terminate() override;
int enqueue(int batchSize, const void* const* inputs, void** outputs,
void* workspace, cudaStream_t stream) override;
......@@ -75,6 +78,9 @@ class SplitPlugin : public PluginTensorRT {
std::vector<int> segment_offsets_;
thrust::device_vector<int> d_segment_offsets_;
thrust::device_vector<float*> d_output_ptrs_;
private:
void shareData(const SplitPlugin* another);
};
#if IS_TRT_VERSION_GE(6000)
......
......@@ -642,7 +642,9 @@ set_tests_properties(test_analyzer_bert PROPERTIES TIMEOUT 120)
set_tests_properties(test_analyzer_mobilenet_depthwise_conv PROPERTIES TIMEOUT 120)
if(WITH_GPU AND TENSORRT_FOUND)
set_tests_properties(trt_mobilenet_test PROPERTIES TIMEOUT 120)
set_tests_properties(test_analyzer_bfloat16_resnet50 PROPERTIES TIMEOUT 120)
if(WITH_MKLDNN)
set_tests_properties(test_analyzer_bfloat16_resnet50 PROPERTIES TIMEOUT 120)
endif()
endif()
if(ON_INFER OR WITH_GPU)
set_tests_properties(test_analyzer_transformer_profile PROPERTIES TIMEOUT 120)
......
......@@ -30,4 +30,6 @@ foreach(target ${TEST_INFERENCE_IR_PASSES})
endforeach()
if(WITH_GPU AND TENSORRT_FOUND)
set_tests_properties(test_trt_subgraph_pass PROPERTIES TIMEOUT 120)
set_tests_properties(test_trt_activation_pass PROPERTIES TIMEOUT 120)
set_tests_properties(test_trt_conv_pass PROPERTIES TIMEOUT 120)
endif()
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import shutil
import unittest
import numpy as np
from inference_pass_test import InferencePassTest
import paddle.fluid as fluid
import paddle.fluid.core as core
from paddle.fluid.core import PassVersionChecker
from paddle.fluid.core import AnalysisConfig
class TensorRTSubgraphPassActivationTest(InferencePassTest):
def setUpTensorRTParam(self):
self.enable_trt = True
self.trt_parameters = TensorRTSubgraphPassActivationTest.TensorRTParam(
1 << 30, 32, 0, AnalysisConfig.Precision.Float32, False, False)
def setUp(self):
self.setUpTensorRTParam()
with fluid.program_guard(self.main_program, self.startup_program):
data = fluid.data(
name="data", shape=[-1, 6, 64, 64], dtype="float32")
act_out = self.append_act(data)
out = fluid.layers.batch_norm(act_out, is_test=True)
self.feeds = {
"data": np.random.random([1, 6, 64, 64]).astype("float32"),
}
self.fetch_list = [out]
def append_act(self, x):
return fluid.layers.relu(x)
def test_check_output(self):
if core.is_compiled_with_cuda():
use_gpu = True
if os.path.exists(self.path + "_opt_cache"):
shutil.rmtree(self.path + "_opt_cache")
if self.trt_parameters.precision == AnalysisConfig.Precision.Float32:
self.check_output_with_option(use_gpu)
else:
self.check_output_with_option(use_gpu, 1e-3)
self.assertTrue(
PassVersionChecker.IsCompatible('tensorrt_subgraph_pass'))
class TensorRTSubgraphPassLeakyReluTest(TensorRTSubgraphPassActivationTest):
def append_act(self, x):
return fluid.layers.leaky_relu(x)
class TensorRTSubgraphPassRelu6Test(TensorRTSubgraphPassActivationTest):
def append_act(self, x):
return fluid.layers.relu6(x)
class TensorRTSubgraphPassSoftMaxTest(TensorRTSubgraphPassActivationTest):
def append_act(self, x):
return fluid.layers.softmax(x)
class TensorRTSubgraphPassSigmoidTest(TensorRTSubgraphPassActivationTest):
def append_act(self, x):
return fluid.layers.sigmoid(x)
class TensorRTSubgraphPassHardSwishTest(TensorRTSubgraphPassActivationTest):
def append_act(self, x):
return fluid.layers.hard_swish(x)
class TensorRTSubgraphPassHardSigmoidTest(TensorRTSubgraphPassActivationTest):
def append_act(self, x):
return fluid.layers.hard_sigmoid(x)
class TensorRTSubgraphPassHardSwishPluginTest(
TensorRTSubgraphPassActivationTest):
def append_act(self, x):
return fluid.layers.hard_swish(x, threshold=4.0, scale=8.0)
class TensorRTSubgraphPassClipTest(TensorRTSubgraphPassActivationTest):
def append_act(self, x):
return fluid.layers.clip(x, 0, 1)
class TensorRTSubgraphPassTanhTest(TensorRTSubgraphPassActivationTest):
def append_act(self, x):
return fluid.layers.tanh(x)
class TensorRTSubgraphPassSwishTest(TensorRTSubgraphPassActivationTest):
def setUpTensorRTParam(self):
self.enable_trt = True
self.trt_parameters = TensorRTSubgraphPassActivationTest.TensorRTParam(
1 << 30, 32, 0, AnalysisConfig.Precision.Float32, True, False)
def append_act(self, x):
return fluid.layers.swish(x)
class TensorRTSubgraphPassSwishFp16SerializeTest(
TensorRTSubgraphPassActivationTest):
def setUpTensorRTParam(self):
self.enable_trt = True
self.trt_parameters = TensorRTSubgraphPassActivationTest.TensorRTParam(
1 << 30, 32, 0, AnalysisConfig.Precision.Half, True, False)
def append_act(self, x):
return fluid.layers.swish(x)
class TensorRTSubgraphPassDynamicSwishFp16SerializeTest(
TensorRTSubgraphPassActivationTest):
def setUpTensorRTParam(self):
self.enable_trt = True
self.trt_parameters = TensorRTSubgraphPassActivationTest.TensorRTParam(
1 << 30, 32, 0, AnalysisConfig.Precision.Half, True, False)
self.dynamic_shape_params = TensorRTSubgraphPassActivationTest.DynamicShapeParam(
{
'data': [1, 6, 8, 8]
}, {'data': [1, 6, 512, 512]}, {'data': [1, 6, 256, 256]}, False)
def append_act(self, x):
return fluid.layers.swish(x)
class TensorRTSubgraphPassPreluAllTest(TensorRTSubgraphPassActivationTest):
def append_act(self, x):
return fluid.layers.prelu(x, mode='all')
class TensorRTSubgraphPassPreluChannelTest(TensorRTSubgraphPassActivationTest):
def append_act(self, x):
return fluid.layers.prelu(x, mode='channel')
class TensorRTSubgraphPassPreluElementTest(TensorRTSubgraphPassActivationTest):
def append_act(self, x):
return fluid.layers.prelu(x, mode='element')
class TensorRTSubgraphPassGeluTest(TensorRTSubgraphPassActivationTest):
def append_act(self, x):
return fluid.layers.gelu(x)
class TensorRTSubgraphPassGeluDynamicTest(TensorRTSubgraphPassActivationTest):
def setUpTensorRTParam(self):
self.enable_trt = True
self.trt_parameters = TensorRTSubgraphPassActivationTest.TensorRTParam(
1 << 30, 32, 0, AnalysisConfig.Precision.Float32, False, False)
self.dynamic_shape_params = TensorRTSubgraphPassActivationTest.DynamicShapeParam(
{
'data': [1, 6, 8, 8]
}, {'data': [1, 6, 512, 512]}, {'data': [1, 6, 256, 256]}, False)
def append_act(self, x):
return fluid.layers.gelu(x)
class TensorRTSubgraphPassGeluFp16Test(TensorRTSubgraphPassActivationTest):
def setUpTensorRTParam(self):
self.enable_trt = True
self.trt_parameters = TensorRTSubgraphPassActivationTest.TensorRTParam(
1 << 30, 32, 0, AnalysisConfig.Precision.Half, False, False)
def append_act(self, x):
return fluid.layers.gelu(x)
class TensorRTSubgraphPassGeluFp16SerializeTest(
TensorRTSubgraphPassActivationTest):
def setUpTensorRTParam(self):
self.enable_trt = True
self.trt_parameters = TensorRTSubgraphPassActivationTest.TensorRTParam(
1 << 30, 32, 0, AnalysisConfig.Precision.Half, True, False)
def append_act(self, x):
return fluid.layers.gelu(x)
class TensorRTSubgraphPassGeluFp16DynamicTest(
TensorRTSubgraphPassActivationTest):
def setUpTensorRTParam(self):
self.enable_trt = True
self.trt_parameters = TensorRTSubgraphPassActivationTest.TensorRTParam(
1 << 30, 32, 0, AnalysisConfig.Precision.Half, False, False)
self.dynamic_shape_params = TensorRTSubgraphPassActivationTest.DynamicShapeParam(
{
'data': [1, 6, 8, 8]
}, {'data': [1, 6, 512, 512]}, {'data': [1, 6, 256, 256]}, False)
def append_act(self, x):
return fluid.layers.gelu(x)
class TensorRTSubgraphPassGeluFp16DynamicSerializeTest(
TensorRTSubgraphPassActivationTest):
def setUpTensorRTParam(self):
self.enable_trt = True
self.trt_parameters = TensorRTSubgraphPassActivationTest.TensorRTParam(
1 << 30, 32, 0, AnalysisConfig.Precision.Half, True, False)
self.dynamic_shape_params = TensorRTSubgraphPassActivationTest.DynamicShapeParam(
{
'data': [1, 6, 8, 8]
}, {'data': [1, 6, 512, 512]}, {'data': [1, 6, 256, 256]}, False)
def append_act(self, x):
return fluid.layers.gelu(x)
if __name__ == "__main__":
unittest.main()
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import shutil
import unittest
import numpy as np
from inference_pass_test import InferencePassTest
import paddle.fluid as fluid
import paddle.fluid.core as core
from paddle.fluid.core import PassVersionChecker
from paddle.fluid.core import AnalysisConfig
class TensorRTSubgraphPassConvTest(InferencePassTest):
def setUp(self):
self.set_params()
with fluid.program_guard(self.main_program, self.startup_program):
data = fluid.data(
name="data", shape=[-1, 6, 64, 64], dtype="float32")
conv_out = fluid.layers.conv2d(
input=data,
num_filters=self.conv_num_filters,
filter_size=self.conv_filter_size,
groups=self.conv_groups,
padding=self.conv_padding,
bias_attr=False,
act=None)
self.feeds = {
"data": np.random.random([1, 6, 64, 64]).astype("float32"),
}
self.enable_trt = True
self.trt_parameters = TensorRTSubgraphPassConvTest.TensorRTParam(
1 << 30, 32, 0, AnalysisConfig.Precision.Float32, False, False)
self.fetch_list = [conv_out]
def set_params(self):
self.conv_num_filters = 6
self.conv_filter_size = 6
self.conv_groups = 3
self.conv_padding = [1, 1]
def test_check_output(self):
if core.is_compiled_with_cuda():
use_gpu = True
self.check_output_with_option(use_gpu)
self.assertTrue(
PassVersionChecker.IsCompatible('tensorrt_subgraph_pass'))
class TensorRTSubgraphPassConvValidPaddingTest(TensorRTSubgraphPassConvTest):
def set_params(self):
self.conv_num_filters = 6
self.conv_filter_size = 6
self.conv_groups = 3
self.conv_padding = 'VALID'
'''
# conv2d padded in 'SAME' mode is not yet supported in TRT, reopen this when support is complete.
class TensorRTSubgraphPassConvSamePaddingTest(InferencePassTest):
def set_params(self):
self.conv_num_filters = 6
self.conv_filter_size = 6
self.conv_groups = 3
self.conv_padding = 'SAME'
'''
class TensorRTSubgraphPassDepthwiseConvTest(TensorRTSubgraphPassConvTest):
def set_params(self):
self.conv_num_filters = 6
self.conv_filter_size = 6
self.conv_groups = 6
self.conv_padding = [1, 1]
class TensorRTSubgraphPassConvTransposeTest(InferencePassTest):
def setUp(self):
self.set_params()
with fluid.program_guard(self.main_program, self.startup_program):
data = fluid.data(
name="data", shape=[-1, 6, 64, 64], dtype="float32")
conv_out = fluid.layers.conv2d_transpose(
input=data,
num_filters=self.conv_num_filters,
filter_size=self.conv_filter_size,
groups=self.conv_groups,
padding=self.conv_padding,
bias_attr=False,
act=None)
self.feeds = {
"data": np.random.random([1, 6, 64, 64]).astype("float32"),
}
self.enable_trt = True
self.trt_parameters = TensorRTSubgraphPassConvTransposeTest.TensorRTParam(
1 << 30, 32, 0, AnalysisConfig.Precision.Float32, False, False)
self.fetch_list = [conv_out]
def set_params(self):
self.conv_num_filters = 6
self.conv_filter_size = 6
self.conv_groups = 1
self.conv_padding = [1, 1]
def test_check_output(self):
if core.is_compiled_with_cuda():
use_gpu = True
self.check_output_with_option(use_gpu)
self.assertTrue(
PassVersionChecker.IsCompatible('tensorrt_subgraph_pass'))
class TensorRTSubgraphPassConvTransposeValidPaddingTest(
TensorRTSubgraphPassConvTransposeTest):
def set_params(self):
self.conv_num_filters = 6
self.conv_filter_size = 6
self.conv_groups = 1
self.conv_padding = 'VALID'
'''
# conv2d_transpose padded in 'SAME' mode is not yet supported in TRT, reopen this when support is complete.
class TensorRTSubgraphPassConvTransposeSamePaddingTest(TensorRTSubgraphPassConvTransposeTest):
def set_params(self):
self.conv_num_filters = 6
self.conv_filter_size = 6
self.conv_groups = 1
self.conv_padding = 'SAME'
'''
class TensorRTSubgraphPassDepthwiseConvTransposeTest(
TensorRTSubgraphPassConvTransposeTest):
def set_params(self):
self.conv_num_filters = 6
self.conv_filter_size = 6
self.conv_groups = 1
self.conv_padding = [1, 1]
if __name__ == "__main__":
unittest.main()
......@@ -23,134 +23,6 @@ from paddle.fluid.core import PassVersionChecker
from paddle.fluid.core import AnalysisConfig
class TensorRTSubgraphPassConvTest(InferencePassTest):
def setUp(self):
self.set_params()
with fluid.program_guard(self.main_program, self.startup_program):
data = fluid.data(
name="data", shape=[-1, 6, 64, 64], dtype="float32")
conv_out = fluid.layers.conv2d(
input=data,
num_filters=self.conv_num_filters,
filter_size=self.conv_filter_size,
groups=self.conv_groups,
padding=self.conv_padding,
bias_attr=False,
act=None)
self.feeds = {
"data": np.random.random([1, 6, 64, 64]).astype("float32"),
}
self.enable_trt = True
self.trt_parameters = TensorRTSubgraphPassConvTest.TensorRTParam(
1 << 30, 32, 0, AnalysisConfig.Precision.Float32, False, False)
self.fetch_list = [conv_out]
def set_params(self):
self.conv_num_filters = 6
self.conv_filter_size = 6
self.conv_groups = 3
self.conv_padding = [1, 1]
def test_check_output(self):
if core.is_compiled_with_cuda():
use_gpu = True
self.check_output_with_option(use_gpu)
self.assertTrue(
PassVersionChecker.IsCompatible('tensorrt_subgraph_pass'))
class TensorRTSubgraphPassConvValidPaddingTest(TensorRTSubgraphPassConvTest):
def set_params(self):
self.conv_num_filters = 6
self.conv_filter_size = 6
self.conv_groups = 3
self.conv_padding = 'VALID'
'''
# conv2d padded in 'SAME' mode is not yet supported in TRT, reopen this when support is complete.
class TensorRTSubgraphPassConvSamePaddingTest(InferencePassTest):
def set_params(self):
self.conv_num_filters = 6
self.conv_filter_size = 6
self.conv_groups = 3
self.conv_padding = 'SAME'
'''
class TensorRTSubgraphPassDepthwiseConvTest(TensorRTSubgraphPassConvTest):
def set_params(self):
self.conv_num_filters = 6
self.conv_filter_size = 6
self.conv_groups = 6
self.conv_padding = [1, 1]
class TensorRTSubgraphPassConvTransposeTest(InferencePassTest):
def setUp(self):
self.set_params()
with fluid.program_guard(self.main_program, self.startup_program):
data = fluid.data(
name="data", shape=[-1, 6, 64, 64], dtype="float32")
conv_out = fluid.layers.conv2d_transpose(
input=data,
num_filters=self.conv_num_filters,
filter_size=self.conv_filter_size,
groups=self.conv_groups,
padding=self.conv_padding,
bias_attr=False,
act=None)
self.feeds = {
"data": np.random.random([1, 6, 64, 64]).astype("float32"),
}
self.enable_trt = True
self.trt_parameters = TensorRTSubgraphPassConvTransposeTest.TensorRTParam(
1 << 30, 32, 0, AnalysisConfig.Precision.Float32, False, False)
self.fetch_list = [conv_out]
def set_params(self):
self.conv_num_filters = 6
self.conv_filter_size = 6
self.conv_groups = 1
self.conv_padding = [1, 1]
def test_check_output(self):
if core.is_compiled_with_cuda():
use_gpu = True
self.check_output_with_option(use_gpu)
self.assertTrue(
PassVersionChecker.IsCompatible('tensorrt_subgraph_pass'))
class TensorRTSubgraphPassConvTransposeValidPaddingTest(
TensorRTSubgraphPassConvTransposeTest):
def set_params(self):
self.conv_num_filters = 6
self.conv_filter_size = 6
self.conv_groups = 1
self.conv_padding = 'VALID'
'''
# conv2d_transpose padded in 'SAME' mode is not yet supported in TRT, reopen this when support is complete.
class TensorRTSubgraphPassConvTransposeSamePaddingTest(TensorRTSubgraphPassConvTransposeTest):
def set_params(self):
self.conv_num_filters = 6
self.conv_filter_size = 6
self.conv_groups = 1
self.conv_padding = 'SAME'
'''
class TensorRTSubgraphPassDepthwiseConvTransposeTest(
TensorRTSubgraphPassConvTransposeTest):
def set_params(self):
self.conv_num_filters = 6
self.conv_filter_size = 6
self.conv_groups = 1
self.conv_padding = [1, 1]
class TensorRTSubgraphPassFcTest(InferencePassTest):
def setUp(self):
with fluid.program_guard(self.main_program, self.startup_program):
......@@ -282,207 +154,6 @@ class TensorRTSubgraphPassValidPaddingPoolTest(InferencePassTest):
self.exclusive = False
class TensorRTSubgraphPassActivationTest(InferencePassTest):
def setUpTensorRTParam(self):
self.enable_trt = True
self.trt_parameters = TensorRTSubgraphPassActivationTest.TensorRTParam(
1 << 30, 32, 0, AnalysisConfig.Precision.Float32, False, False)
def setUp(self):
self.setUpTensorRTParam()
with fluid.program_guard(self.main_program, self.startup_program):
data = fluid.data(
name="data", shape=[-1, 6, 64, 64], dtype="float32")
act_out = self.append_act(data)
out = fluid.layers.batch_norm(act_out, is_test=True)
self.feeds = {
"data": np.random.random([1, 6, 64, 64]).astype("float32"),
}
self.fetch_list = [out]
def append_act(self, x):
return fluid.layers.relu(x)
def test_check_output(self):
if core.is_compiled_with_cuda():
use_gpu = True
if os.path.exists(self.path + "_opt_cache"):
shutil.rmtree(self.path + "_opt_cache")
if self.trt_parameters.precision == AnalysisConfig.Precision.Float32:
self.check_output_with_option(use_gpu)
else:
self.check_output_with_option(use_gpu, 1e-3)
self.assertTrue(
PassVersionChecker.IsCompatible('tensorrt_subgraph_pass'))
class TensorRTSubgraphPassLeakyReluTest(TensorRTSubgraphPassActivationTest):
def append_act(self, x):
return fluid.layers.leaky_relu(x)
class TensorRTSubgraphPassRelu6Test(TensorRTSubgraphPassActivationTest):
def append_act(self, x):
return fluid.layers.relu6(x)
class TensorRTSubgraphPassSoftMaxTest(TensorRTSubgraphPassActivationTest):
def append_act(self, x):
return fluid.layers.softmax(x)
class TensorRTSubgraphPassSigmoidTest(TensorRTSubgraphPassActivationTest):
def append_act(self, x):
return fluid.layers.sigmoid(x)
class TensorRTSubgraphPassHardSwishTest(TensorRTSubgraphPassActivationTest):
def append_act(self, x):
return fluid.layers.hard_swish(x)
class TensorRTSubgraphPassHardSwishPluginTest(
TensorRTSubgraphPassActivationTest):
def append_act(self, x):
return fluid.layers.hard_swish(x, threshold=4.0, scale=8.0)
class TensorRTSubgraphPassHardSigmoidTest(TensorRTSubgraphPassActivationTest):
def append_act(self, x):
return fluid.layers.hard_sigmoid(x)
class TensorRTSubgraphPassClipTest(TensorRTSubgraphPassActivationTest):
def append_act(self, x):
return fluid.layers.clip(x, 0, 1)
class TensorRTSubgraphPassTanhTest(TensorRTSubgraphPassActivationTest):
def append_act(self, x):
return fluid.layers.tanh(x)
class TensorRTSubgraphPassSwishTest(TensorRTSubgraphPassActivationTest):
def setUpTensorRTParam(self):
self.enable_trt = True
self.trt_parameters = TensorRTSubgraphPassActivationTest.TensorRTParam(
1 << 30, 32, 0, AnalysisConfig.Precision.Float32, True, False)
def append_act(self, x):
return fluid.layers.swish(x)
class TensorRTSubgraphPassSwishFp16SerializeTest(
TensorRTSubgraphPassActivationTest):
def setUpTensorRTParam(self):
self.enable_trt = True
self.trt_parameters = TensorRTSubgraphPassActivationTest.TensorRTParam(
1 << 30, 32, 0, AnalysisConfig.Precision.Half, True, False)
def append_act(self, x):
return fluid.layers.swish(x)
class TensorRTSubgraphPassDynamicSwishFp16SerializeTest(
TensorRTSubgraphPassActivationTest):
def setUpTensorRTParam(self):
self.enable_trt = True
self.trt_parameters = TensorRTSubgraphPassActivationTest.TensorRTParam(
1 << 30, 32, 0, AnalysisConfig.Precision.Half, True, False)
self.dynamic_shape_params = TensorRTSubgraphPassActivationTest.DynamicShapeParam(
{
'data': [1, 6, 8, 8]
}, {'data': [1, 6, 512, 512]}, {'data': [1, 6, 256, 256]}, False)
def append_act(self, x):
return fluid.layers.swish(x)
class TensorRTSubgraphPassPreluAllTest(TensorRTSubgraphPassActivationTest):
def append_act(self, x):
return fluid.layers.prelu(x, mode='all')
class TensorRTSubgraphPassPreluChannelTest(TensorRTSubgraphPassActivationTest):
def append_act(self, x):
return fluid.layers.prelu(x, mode='channel')
class TensorRTSubgraphPassPreluElementTest(TensorRTSubgraphPassActivationTest):
def append_act(self, x):
return fluid.layers.prelu(x, mode='element')
class TensorRTSubgraphPassGeluTest(TensorRTSubgraphPassActivationTest):
def append_act(self, x):
return fluid.layers.gelu(x)
class TensorRTSubgraphPassGeluDynamicTest(TensorRTSubgraphPassActivationTest):
def setUpTensorRTParam(self):
self.enable_trt = True
self.trt_parameters = TensorRTSubgraphPassActivationTest.TensorRTParam(
1 << 30, 32, 0, AnalysisConfig.Precision.Float32, False, False)
self.dynamic_shape_params = TensorRTSubgraphPassActivationTest.DynamicShapeParam(
{
'data': [1, 6, 8, 8]
}, {'data': [1, 6, 512, 512]}, {'data': [1, 6, 256, 256]}, False)
def append_act(self, x):
return fluid.layers.gelu(x)
class TensorRTSubgraphPassGeluFp16Test(TensorRTSubgraphPassActivationTest):
def setUpTensorRTParam(self):
self.enable_trt = True
self.trt_parameters = TensorRTSubgraphPassActivationTest.TensorRTParam(
1 << 30, 32, 0, AnalysisConfig.Precision.Half, False, False)
def append_act(self, x):
return fluid.layers.gelu(x)
class TensorRTSubgraphPassGeluFp16SerializeTest(
TensorRTSubgraphPassActivationTest):
def setUpTensorRTParam(self):
self.enable_trt = True
self.trt_parameters = TensorRTSubgraphPassActivationTest.TensorRTParam(
1 << 30, 32, 0, AnalysisConfig.Precision.Half, True, False)
def append_act(self, x):
return fluid.layers.gelu(x)
class TensorRTSubgraphPassGeluFp16DynamicTest(
TensorRTSubgraphPassActivationTest):
def setUpTensorRTParam(self):
self.enable_trt = True
self.trt_parameters = TensorRTSubgraphPassActivationTest.TensorRTParam(
1 << 30, 32, 0, AnalysisConfig.Precision.Half, False, False)
self.dynamic_shape_params = TensorRTSubgraphPassActivationTest.DynamicShapeParam(
{
'data': [1, 6, 8, 8]
}, {'data': [1, 6, 512, 512]}, {'data': [1, 6, 256, 256]}, False)
def append_act(self, x):
return fluid.layers.gelu(x)
class TensorRTSubgraphPassGeluFp16DynamicSerializeTest(
TensorRTSubgraphPassActivationTest):
def setUpTensorRTParam(self):
self.enable_trt = True
self.trt_parameters = TensorRTSubgraphPassActivationTest.TensorRTParam(
1 << 30, 32, 0, AnalysisConfig.Precision.Half, True, False)
self.dynamic_shape_params = TensorRTSubgraphPassActivationTest.DynamicShapeParam(
{
'data': [1, 6, 8, 8]
}, {'data': [1, 6, 512, 512]}, {'data': [1, 6, 256, 256]}, False)
def append_act(self, x):
return fluid.layers.gelu(x)
class TensorRTSubgraphPassConcatTest(InferencePassTest):
def setUp(self):
with fluid.program_guard(self.main_program, self.startup_program):
......@@ -570,7 +241,7 @@ class TensorRTSubgraphPassDynamicSplitFp16SerializeTest(InferencePassTest):
self.enable_trt = True
self.trt_parameters = TensorRTSubgraphPassSplitTest.TensorRTParam(
1 << 30, 32, 0, AnalysisConfig.Precision.Half, True, False)
self.dynamic_shape_params = TensorRTSubgraphPassActivationTest.DynamicShapeParam(
self.dynamic_shape_params = TensorRTSubgraphPassDynamicSplitFp16SerializeTest.DynamicShapeParam(
{
'data': [1, 3, 8, 64]
}, {'data': [1, 3, 512, 64]}, {'data': [1, 3, 256, 64]}, False)
......
......@@ -130,6 +130,12 @@ function build_cpython {
function build_cpythons {
for py_ver in $@; do
if [ ${py_ver} == "2.7.15" ]; then
GET_PIP_URL="https://bootstrap.pypa.io/2.7/get-pip.py"
elif [ ${py_ver} == "3.5.1" ] ;then
GET_PIP_URL="https://bootstrap.pypa.io/3.5/get-pip.py"
fi
check_var $GET_PIP_URL
curl -sLO $GET_PIP_URL
build_cpython $py_ver
......
......@@ -41,9 +41,9 @@ function make_centos_dockerfile(){
sed "s/<baseimg>/11.0-cudnn8-devel-centos7/g" Dockerfile.centos >${dockerfile_name}
sed -i "s#COPY build_scripts /build_scripts#COPY tools/dockerfile/build_scripts ./build_scripts#g" ${dockerfile_name}
dockerfile_line=$(wc -l ${dockerfile_name}|awk '{print $1}')
sed -i "${dockerfile_line}i RUN rm -f /usr/bin/cc && ln -s /usr/local/gcc-8.2/bin/gcc /usr/bin/cc" ${dockerfile_name}
sed -i "${dockerfile_line}i RUN ln -s /usr/lib64/libz.so /usr/local/lib/libz.so \\
RUN ln -s /usr/local/lib/libnccl.so /usr/local/cuda/lib64/ \\
RUN rm -rf /usr/include/NvInfer*" ${dockerfile_name}
RUN ln -s /usr/local/lib/libnccl.so /usr/local/cuda/lib64/" ${dockerfile_name}
sed -i $"${dockerfile_line}i RUN wget --no-check-certificate -q https://paddle-edl.bj.bcebos.com/hadoop-2.7.7.tar.gz \\
RUN tar -xzf hadoop-2.7.7.tar.gz && mv hadoop-2.7.7 /usr/local/" ${dockerfile_name}
sed -i "s#RUN bash build_scripts/build.sh#RUN bash build_scripts/install_gcc.sh gcc82 \nRUN mv /usr/bin/cc /usr/bin/cc.bak \&\& ln -s /usr/local/gcc-8.2/bin/gcc /usr/bin/cc \nENV PATH=/usr/local/gcc-8.2/bin:\$PATH \nRUN bash build_scripts/build.sh#g" ${dockerfile_name}
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册