From a64bea0cf8d0c2a9e72d8f519ab29bedcaff3bf4 Mon Sep 17 00:00:00 2001 From: Shang Zhizhou Date: Fri, 5 Feb 2021 14:06:42 +0800 Subject: [PATCH] fix trt plugin clone and initialize bugs in TRT7.1+ (#30709) (#30822) Co-authored-by: tianshuo78520a <707759223@qq.com> --- .../plugin/emb_eltwise_layernorm_plugin.cu | 22 +- .../plugin/emb_eltwise_layernorm_plugin.h | 9 + .../plugin/instance_norm_op_plugin.cu | 8 +- .../tensorrt/plugin/instance_norm_op_plugin.h | 16 +- .../tensorrt/plugin/prelu_op_plugin.cu | 7 + .../tensorrt/plugin/prelu_op_plugin.h | 9 +- .../plugin/skip_layernorm_op_plugin.cu | 11 + .../plugin/skip_layernorm_op_plugin.h | 5 +- .../tensorrt/plugin/split_op_plugin.cu | 13 + .../tensorrt/plugin/split_op_plugin.h | 8 +- .../fluid/inference/tests/api/CMakeLists.txt | 4 +- .../unittests/ir/inference/CMakeLists.txt | 2 + .../ir/inference/test_trt_activation_pass.py | 228 ++++++++++++ .../ir/inference/test_trt_conv_pass.py | 155 ++++++++ .../ir/inference/test_trt_subgraph_pass.py | 331 +----------------- tools/dockerfile/build_scripts/build_utils.sh | 6 + tools/dockerfile/ci_dockerfile.sh | 4 +- 17 files changed, 489 insertions(+), 349 deletions(-) create mode 100644 python/paddle/fluid/tests/unittests/ir/inference/test_trt_activation_pass.py create mode 100644 python/paddle/fluid/tests/unittests/ir/inference/test_trt_conv_pass.py diff --git a/paddle/fluid/inference/tensorrt/plugin/emb_eltwise_layernorm_plugin.cu b/paddle/fluid/inference/tensorrt/plugin/emb_eltwise_layernorm_plugin.cu index 30667514ac8..238daa4a886 100644 --- a/paddle/fluid/inference/tensorrt/plugin/emb_eltwise_layernorm_plugin.cu +++ b/paddle/fluid/inference/tensorrt/plugin/emb_eltwise_layernorm_plugin.cu @@ -39,8 +39,27 @@ EmbEltwiseLayernormPluginDynamicImpl< inline half fp32tofp16(float x) { return static_cast(x); } +template +void EmbEltwiseLayernormPluginDynamicImpl::shareGPUData( + const EmbEltwiseLayernormPluginDynamicImplBase *anthor) { + auto *ptr = + dynamic_cast *>(anthor); + if (!ptr->is_initialized_) { + return; + } + embs_gpu_ = ptr->embs_gpu_; + scale_gpu_ = ptr->scale_gpu_; + bias_gpu_ = ptr->bias_gpu_; + int input_num = embs_.size(); + in_ptr_tensor_.Resize({input_num}); + emb_ptr_tensor_.ShareDataWith(ptr->emb_ptr_tensor_); +} + template int EmbEltwiseLayernormPluginDynamicImpl::initialize() { + if (is_initialized_) { + return 0; + } embs_gpu_.resize(embs_.size()); for (int i = 0; i < embs_.size(); i++) { if (embs_[i]) { @@ -77,13 +96,12 @@ int EmbEltwiseLayernormPluginDynamicImpl::initialize() { int input_num = embs_.size(); in_ptr_tensor_.Resize({input_num}); emb_ptr_tensor_.Resize({input_num}); - cudaGetDevice(&device_id_); auto emb_ptr_gpu_d = emb_ptr_tensor_.mutable_data(platform::CUDAPlace(device_id_)); cudaMemcpy(emb_ptr_gpu_d, embs_gpu_.data(), sizeof(uintptr_t) * input_num, cudaMemcpyHostToDevice); - + is_initialized_ = true; return 0; } diff --git a/paddle/fluid/inference/tensorrt/plugin/emb_eltwise_layernorm_plugin.h b/paddle/fluid/inference/tensorrt/plugin/emb_eltwise_layernorm_plugin.h index fcba85daf9f..6c8381a750c 100644 --- a/paddle/fluid/inference/tensorrt/plugin/emb_eltwise_layernorm_plugin.h +++ b/paddle/fluid/inference/tensorrt/plugin/emb_eltwise_layernorm_plugin.h @@ -39,6 +39,8 @@ class EmbEltwiseLayernormPluginDynamicImplBase { const nvinfer1::PluginTensorDesc* outputDesc, const void* const* inputs, void* const* outputs, void* workspace, cudaStream_t stream) = 0; + virtual void shareGPUData( + const EmbEltwiseLayernormPluginDynamicImplBase* anthor) = 0; }; template @@ -67,6 +69,7 @@ class EmbEltwiseLayernormPluginDynamicImpl const nvinfer1::PluginTensorDesc* outputDesc, const void* const* inputs, void* const* outputs, void* workspace, cudaStream_t stream); + void shareGPUData(const EmbEltwiseLayernormPluginDynamicImplBase* anthor); private: std::vector embs_; @@ -87,6 +90,7 @@ class EmbEltwiseLayernormPluginDynamicImpl framework::Tensor in_ptr_tensor_, emb_ptr_tensor_; int device_id_{0}; uintptr_t old_input_ptr_{0}; + bool is_initialized_{false}; }; class EmbEltwiseLayernormPluginDynamic : public DynamicPluginTensorRT { @@ -189,6 +193,7 @@ class EmbEltwiseLayernormPluginDynamic : public DynamicPluginTensorRT { auto ptr = new EmbEltwiseLayernormPluginDynamic( embs_, bias_, scale_, emb_sizes_, bias_size_, scale_size_, hidden_size_, eps_, with_fp16_); + ptr->shareGPUData(this); return ptr; } @@ -295,6 +300,10 @@ class EmbEltwiseLayernormPluginDynamic : public DynamicPluginTensorRT { bool own_host_buff_{false}; EmbEltwiseLayernormPluginDynamicImplBase* impl_{nullptr}; + + void shareGPUData(const EmbEltwiseLayernormPluginDynamic* anthor) { + impl_->shareGPUData(anthor->impl_); + } }; class EmbEltwiseLayernormPluginV2Creator : public nvinfer1::IPluginCreator { diff --git a/paddle/fluid/inference/tensorrt/plugin/instance_norm_op_plugin.cu b/paddle/fluid/inference/tensorrt/plugin/instance_norm_op_plugin.cu index a22714aa92f..a579743ee8a 100644 --- a/paddle/fluid/inference/tensorrt/plugin/instance_norm_op_plugin.cu +++ b/paddle/fluid/inference/tensorrt/plugin/instance_norm_op_plugin.cu @@ -47,13 +47,7 @@ InstanceNormPlugin *CreateInstanceNormPluginDeserialize(const void *buffer, REGISTER_TRT_PLUGIN("instance_norm_plugin", CreateInstanceNormPluginDeserialize); -int InstanceNormPlugin::initialize() { - platform::dynload::cudnnCreate(&handle_); - platform::dynload::cudnnCreateTensorDescriptor(&x_desc_); - platform::dynload::cudnnCreateTensorDescriptor(&y_desc_); - platform::dynload::cudnnCreateTensorDescriptor(&b_desc_); - return 0; -} +int InstanceNormPlugin::initialize() { return 0; } nvinfer1::Dims InstanceNormPlugin::getOutputDimensions( int index, const nvinfer1::Dims *inputDims, int nbInputs) { diff --git a/paddle/fluid/inference/tensorrt/plugin/instance_norm_op_plugin.h b/paddle/fluid/inference/tensorrt/plugin/instance_norm_op_plugin.h index ec1d8e6517e..83422708f59 100644 --- a/paddle/fluid/inference/tensorrt/plugin/instance_norm_op_plugin.h +++ b/paddle/fluid/inference/tensorrt/plugin/instance_norm_op_plugin.h @@ -65,6 +65,10 @@ class InstanceNormPlugin : public PluginTensorRT { "The instanceNorm's scale and bias should be the " "same size. Got scale size = %d, but bias size = %d", scale.size(), bias.size())); + platform::dynload::cudnnCreate(&handle_); + platform::dynload::cudnnCreateTensorDescriptor(&x_desc_); + platform::dynload::cudnnCreateTensorDescriptor(&y_desc_); + platform::dynload::cudnnCreateTensorDescriptor(&b_desc_); } // It was used for tensorrt deserialization. @@ -74,9 +78,19 @@ class InstanceNormPlugin : public PluginTensorRT { DeserializeValue(&serialData, &serialLength, &eps_); DeserializeValue(&serialData, &serialLength, &scale_); DeserializeValue(&serialData, &serialLength, &bias_); + + platform::dynload::cudnnCreate(&handle_); + platform::dynload::cudnnCreateTensorDescriptor(&x_desc_); + platform::dynload::cudnnCreateTensorDescriptor(&y_desc_); + platform::dynload::cudnnCreateTensorDescriptor(&b_desc_); } - ~InstanceNormPlugin() {} + ~InstanceNormPlugin() { + platform::dynload::cudnnDestroy(handle_); + platform::dynload::cudnnDestroyTensorDescriptor(x_desc_); + platform::dynload::cudnnDestroyTensorDescriptor(y_desc_); + platform::dynload::cudnnDestroyTensorDescriptor(b_desc_); + } int initialize() override; InstanceNormPlugin *clone() const override { diff --git a/paddle/fluid/inference/tensorrt/plugin/prelu_op_plugin.cu b/paddle/fluid/inference/tensorrt/plugin/prelu_op_plugin.cu index 860f1039d5e..00182b87e98 100644 --- a/paddle/fluid/inference/tensorrt/plugin/prelu_op_plugin.cu +++ b/paddle/fluid/inference/tensorrt/plugin/prelu_op_plugin.cu @@ -39,6 +39,13 @@ int PReluPlugin::initialize() { return 0; } +void PReluPlugin::terminate() { + if (p_gpu_weight_) { + cudaFree(p_gpu_weight_); + p_gpu_weight_ = nullptr; + } +} + nvinfer1::Dims PReluPlugin::getOutputDimensions(int index, const nvinfer1::Dims *inputDims, int nbInputs) { diff --git a/paddle/fluid/inference/tensorrt/plugin/prelu_op_plugin.h b/paddle/fluid/inference/tensorrt/plugin/prelu_op_plugin.h index 3126366c5fd..a0a24e70a01 100644 --- a/paddle/fluid/inference/tensorrt/plugin/prelu_op_plugin.h +++ b/paddle/fluid/inference/tensorrt/plugin/prelu_op_plugin.h @@ -66,11 +66,14 @@ class PReluPlugin : public PluginTensorRT { DeserializeValue(&serialData, &serialLength, &prelu_mode); mode_ = std::string(prelu_mode); } - ~PReluPlugin() { cudaFree(p_gpu_weight_); } + ~PReluPlugin() {} int initialize() override; + void terminate() override; PReluPlugin* clone() const override { - return new PReluPlugin(weight_.data(), weight_.size(), mode_); + auto* ptr = new PReluPlugin(weight_.data(), weight_.size(), mode_); + ptr->p_gpu_weight_ = p_gpu_weight_; + return ptr; } const char* getPluginType() const override { return "prelu_plugin"; } @@ -100,7 +103,7 @@ class PReluPluginDynamic : public DynamicPluginTensorRT { DeserializeValue(&serialData, &serialLength, &prelu_mode); mode_ = std::string(prelu_mode); } - ~PReluPluginDynamic() { cudaFree(p_gpu_weight_); } + ~PReluPluginDynamic() {} nvinfer1::IPluginV2DynamicExt* clone() const override { auto ptr = new PReluPluginDynamic(weight_.data(), weight_.size(), mode_); ptr->p_gpu_weight_ = p_gpu_weight_; diff --git a/paddle/fluid/inference/tensorrt/plugin/skip_layernorm_op_plugin.cu b/paddle/fluid/inference/tensorrt/plugin/skip_layernorm_op_plugin.cu index 6b2b93ba223..3b9eea22199 100644 --- a/paddle/fluid/inference/tensorrt/plugin/skip_layernorm_op_plugin.cu +++ b/paddle/fluid/inference/tensorrt/plugin/skip_layernorm_op_plugin.cu @@ -40,6 +40,17 @@ int SkipLayerNormPluginDynamic::initialize() { return 0; } +void SkipLayerNormPluginDynamic::terminate() { + if (bias_gpu_) { + cudaFree(bias_gpu_); + bias_gpu_ = nullptr; + } + if (scale_gpu_) { + cudaFree(scale_gpu_); + scale_gpu_ = nullptr; + } +} + nvinfer1::DimsExprs SkipLayerNormPluginDynamic::getOutputDimensions( int output_index, const nvinfer1::DimsExprs *inputs, int nb_inputs, nvinfer1::IExprBuilder &expr_builder) { diff --git a/paddle/fluid/inference/tensorrt/plugin/skip_layernorm_op_plugin.h b/paddle/fluid/inference/tensorrt/plugin/skip_layernorm_op_plugin.h index 563e2e119f5..0e457fdc8f4 100644 --- a/paddle/fluid/inference/tensorrt/plugin/skip_layernorm_op_plugin.h +++ b/paddle/fluid/inference/tensorrt/plugin/skip_layernorm_op_plugin.h @@ -104,13 +104,14 @@ class SkipLayerNormPluginDynamic : public DynamicPluginTensorRT { int nb_inputs) const override; void destroy() override { delete this; } + void terminate() override; private: std::vector bias_; std::vector scale_; - float* bias_gpu_; - float* scale_gpu_; + float* bias_gpu_{nullptr}; + float* scale_gpu_{nullptr}; int bias_size_; int scale_size_; diff --git a/paddle/fluid/inference/tensorrt/plugin/split_op_plugin.cu b/paddle/fluid/inference/tensorrt/plugin/split_op_plugin.cu index 2f4f731d887..256aa28206a 100644 --- a/paddle/fluid/inference/tensorrt/plugin/split_op_plugin.cu +++ b/paddle/fluid/inference/tensorrt/plugin/split_op_plugin.cu @@ -62,6 +62,16 @@ nvinfer1::Dims SplitPlugin::getOutputDimensions( return output_dims; } +void SplitPlugin::shareData(const SplitPlugin* another) { + outer_rows_ = another->outer_rows_; + inner_cols_ = another->inner_cols_; + same_shape_ = another->same_shape_; + axis_shape_ = another->axis_shape_; + d_segment_offsets_ = another->d_segment_offsets_; + segment_offsets_ = another->segment_offsets_; + d_output_ptrs_.resize(another->d_output_ptrs_.size(), nullptr); +} + int SplitPlugin::initialize() { PADDLE_ENFORCE_LE(axis_, nvinfer1::Dims::MAX_DIMS, platform::errors::InvalidArgument( @@ -93,6 +103,9 @@ int SplitPlugin::initialize() { return 0; } +// nothing to release according to initialize +void SplitPlugin::terminate() {} + // The following part of the code refers to onnx-tensorrt // https://github.com/onnx/onnx-tensorrt/blob/master/Split.cu template diff --git a/paddle/fluid/inference/tensorrt/plugin/split_op_plugin.h b/paddle/fluid/inference/tensorrt/plugin/split_op_plugin.h index e3057f2bd18..5c47ec3a990 100644 --- a/paddle/fluid/inference/tensorrt/plugin/split_op_plugin.h +++ b/paddle/fluid/inference/tensorrt/plugin/split_op_plugin.h @@ -40,7 +40,9 @@ class SplitPlugin : public PluginTensorRT { } SplitPlugin* clone() const override { - return new SplitPlugin(axis_, output_length_, with_fp16_); + auto* ptr = new SplitPlugin(axis_, output_length_, with_fp16_); + ptr->shareData(this); + return ptr; } const char* getPluginType() const override { return "split_plugin"; } @@ -50,6 +52,7 @@ class SplitPlugin : public PluginTensorRT { int num_inputs) override; int initialize() override; + void terminate() override; int enqueue(int batchSize, const void* const* inputs, void** outputs, void* workspace, cudaStream_t stream) override; @@ -75,6 +78,9 @@ class SplitPlugin : public PluginTensorRT { std::vector segment_offsets_; thrust::device_vector d_segment_offsets_; thrust::device_vector d_output_ptrs_; + + private: + void shareData(const SplitPlugin* another); }; #if IS_TRT_VERSION_GE(6000) diff --git a/paddle/fluid/inference/tests/api/CMakeLists.txt b/paddle/fluid/inference/tests/api/CMakeLists.txt index 3cad132825c..ca5148cbd0c 100644 --- a/paddle/fluid/inference/tests/api/CMakeLists.txt +++ b/paddle/fluid/inference/tests/api/CMakeLists.txt @@ -642,7 +642,9 @@ set_tests_properties(test_analyzer_bert PROPERTIES TIMEOUT 120) set_tests_properties(test_analyzer_mobilenet_depthwise_conv PROPERTIES TIMEOUT 120) if(WITH_GPU AND TENSORRT_FOUND) set_tests_properties(trt_mobilenet_test PROPERTIES TIMEOUT 120) - set_tests_properties(test_analyzer_bfloat16_resnet50 PROPERTIES TIMEOUT 120) + if(WITH_MKLDNN) + set_tests_properties(test_analyzer_bfloat16_resnet50 PROPERTIES TIMEOUT 120) + endif() endif() if(ON_INFER OR WITH_GPU) set_tests_properties(test_analyzer_transformer_profile PROPERTIES TIMEOUT 120) diff --git a/python/paddle/fluid/tests/unittests/ir/inference/CMakeLists.txt b/python/paddle/fluid/tests/unittests/ir/inference/CMakeLists.txt index b667f522c09..dfec1cc7572 100644 --- a/python/paddle/fluid/tests/unittests/ir/inference/CMakeLists.txt +++ b/python/paddle/fluid/tests/unittests/ir/inference/CMakeLists.txt @@ -30,4 +30,6 @@ foreach(target ${TEST_INFERENCE_IR_PASSES}) endforeach() if(WITH_GPU AND TENSORRT_FOUND) set_tests_properties(test_trt_subgraph_pass PROPERTIES TIMEOUT 120) +set_tests_properties(test_trt_activation_pass PROPERTIES TIMEOUT 120) +set_tests_properties(test_trt_conv_pass PROPERTIES TIMEOUT 120) endif() diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_activation_pass.py b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_activation_pass.py new file mode 100644 index 00000000000..f71951497f2 --- /dev/null +++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_activation_pass.py @@ -0,0 +1,228 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import shutil +import unittest +import numpy as np +from inference_pass_test import InferencePassTest +import paddle.fluid as fluid +import paddle.fluid.core as core +from paddle.fluid.core import PassVersionChecker +from paddle.fluid.core import AnalysisConfig + + +class TensorRTSubgraphPassActivationTest(InferencePassTest): + def setUpTensorRTParam(self): + self.enable_trt = True + self.trt_parameters = TensorRTSubgraphPassActivationTest.TensorRTParam( + 1 << 30, 32, 0, AnalysisConfig.Precision.Float32, False, False) + + def setUp(self): + self.setUpTensorRTParam() + with fluid.program_guard(self.main_program, self.startup_program): + data = fluid.data( + name="data", shape=[-1, 6, 64, 64], dtype="float32") + act_out = self.append_act(data) + out = fluid.layers.batch_norm(act_out, is_test=True) + self.feeds = { + "data": np.random.random([1, 6, 64, 64]).astype("float32"), + } + self.fetch_list = [out] + + def append_act(self, x): + return fluid.layers.relu(x) + + def test_check_output(self): + if core.is_compiled_with_cuda(): + use_gpu = True + if os.path.exists(self.path + "_opt_cache"): + shutil.rmtree(self.path + "_opt_cache") + if self.trt_parameters.precision == AnalysisConfig.Precision.Float32: + self.check_output_with_option(use_gpu) + else: + self.check_output_with_option(use_gpu, 1e-3) + self.assertTrue( + PassVersionChecker.IsCompatible('tensorrt_subgraph_pass')) + + +class TensorRTSubgraphPassLeakyReluTest(TensorRTSubgraphPassActivationTest): + def append_act(self, x): + return fluid.layers.leaky_relu(x) + + +class TensorRTSubgraphPassRelu6Test(TensorRTSubgraphPassActivationTest): + def append_act(self, x): + return fluid.layers.relu6(x) + + +class TensorRTSubgraphPassSoftMaxTest(TensorRTSubgraphPassActivationTest): + def append_act(self, x): + return fluid.layers.softmax(x) + + +class TensorRTSubgraphPassSigmoidTest(TensorRTSubgraphPassActivationTest): + def append_act(self, x): + return fluid.layers.sigmoid(x) + + +class TensorRTSubgraphPassHardSwishTest(TensorRTSubgraphPassActivationTest): + def append_act(self, x): + return fluid.layers.hard_swish(x) + + +class TensorRTSubgraphPassHardSigmoidTest(TensorRTSubgraphPassActivationTest): + def append_act(self, x): + return fluid.layers.hard_sigmoid(x) + + +class TensorRTSubgraphPassHardSwishPluginTest( + TensorRTSubgraphPassActivationTest): + def append_act(self, x): + return fluid.layers.hard_swish(x, threshold=4.0, scale=8.0) + + +class TensorRTSubgraphPassClipTest(TensorRTSubgraphPassActivationTest): + def append_act(self, x): + return fluid.layers.clip(x, 0, 1) + + +class TensorRTSubgraphPassTanhTest(TensorRTSubgraphPassActivationTest): + def append_act(self, x): + return fluid.layers.tanh(x) + + +class TensorRTSubgraphPassSwishTest(TensorRTSubgraphPassActivationTest): + def setUpTensorRTParam(self): + self.enable_trt = True + self.trt_parameters = TensorRTSubgraphPassActivationTest.TensorRTParam( + 1 << 30, 32, 0, AnalysisConfig.Precision.Float32, True, False) + + def append_act(self, x): + return fluid.layers.swish(x) + + +class TensorRTSubgraphPassSwishFp16SerializeTest( + TensorRTSubgraphPassActivationTest): + def setUpTensorRTParam(self): + self.enable_trt = True + self.trt_parameters = TensorRTSubgraphPassActivationTest.TensorRTParam( + 1 << 30, 32, 0, AnalysisConfig.Precision.Half, True, False) + + def append_act(self, x): + return fluid.layers.swish(x) + + +class TensorRTSubgraphPassDynamicSwishFp16SerializeTest( + TensorRTSubgraphPassActivationTest): + def setUpTensorRTParam(self): + self.enable_trt = True + self.trt_parameters = TensorRTSubgraphPassActivationTest.TensorRTParam( + 1 << 30, 32, 0, AnalysisConfig.Precision.Half, True, False) + self.dynamic_shape_params = TensorRTSubgraphPassActivationTest.DynamicShapeParam( + { + 'data': [1, 6, 8, 8] + }, {'data': [1, 6, 512, 512]}, {'data': [1, 6, 256, 256]}, False) + + def append_act(self, x): + return fluid.layers.swish(x) + + +class TensorRTSubgraphPassPreluAllTest(TensorRTSubgraphPassActivationTest): + def append_act(self, x): + return fluid.layers.prelu(x, mode='all') + + +class TensorRTSubgraphPassPreluChannelTest(TensorRTSubgraphPassActivationTest): + def append_act(self, x): + return fluid.layers.prelu(x, mode='channel') + + +class TensorRTSubgraphPassPreluElementTest(TensorRTSubgraphPassActivationTest): + def append_act(self, x): + return fluid.layers.prelu(x, mode='element') + + +class TensorRTSubgraphPassGeluTest(TensorRTSubgraphPassActivationTest): + def append_act(self, x): + return fluid.layers.gelu(x) + + +class TensorRTSubgraphPassGeluDynamicTest(TensorRTSubgraphPassActivationTest): + def setUpTensorRTParam(self): + self.enable_trt = True + self.trt_parameters = TensorRTSubgraphPassActivationTest.TensorRTParam( + 1 << 30, 32, 0, AnalysisConfig.Precision.Float32, False, False) + self.dynamic_shape_params = TensorRTSubgraphPassActivationTest.DynamicShapeParam( + { + 'data': [1, 6, 8, 8] + }, {'data': [1, 6, 512, 512]}, {'data': [1, 6, 256, 256]}, False) + + def append_act(self, x): + return fluid.layers.gelu(x) + + +class TensorRTSubgraphPassGeluFp16Test(TensorRTSubgraphPassActivationTest): + def setUpTensorRTParam(self): + self.enable_trt = True + self.trt_parameters = TensorRTSubgraphPassActivationTest.TensorRTParam( + 1 << 30, 32, 0, AnalysisConfig.Precision.Half, False, False) + + def append_act(self, x): + return fluid.layers.gelu(x) + + +class TensorRTSubgraphPassGeluFp16SerializeTest( + TensorRTSubgraphPassActivationTest): + def setUpTensorRTParam(self): + self.enable_trt = True + self.trt_parameters = TensorRTSubgraphPassActivationTest.TensorRTParam( + 1 << 30, 32, 0, AnalysisConfig.Precision.Half, True, False) + + def append_act(self, x): + return fluid.layers.gelu(x) + + +class TensorRTSubgraphPassGeluFp16DynamicTest( + TensorRTSubgraphPassActivationTest): + def setUpTensorRTParam(self): + self.enable_trt = True + self.trt_parameters = TensorRTSubgraphPassActivationTest.TensorRTParam( + 1 << 30, 32, 0, AnalysisConfig.Precision.Half, False, False) + self.dynamic_shape_params = TensorRTSubgraphPassActivationTest.DynamicShapeParam( + { + 'data': [1, 6, 8, 8] + }, {'data': [1, 6, 512, 512]}, {'data': [1, 6, 256, 256]}, False) + + def append_act(self, x): + return fluid.layers.gelu(x) + + +class TensorRTSubgraphPassGeluFp16DynamicSerializeTest( + TensorRTSubgraphPassActivationTest): + def setUpTensorRTParam(self): + self.enable_trt = True + self.trt_parameters = TensorRTSubgraphPassActivationTest.TensorRTParam( + 1 << 30, 32, 0, AnalysisConfig.Precision.Half, True, False) + self.dynamic_shape_params = TensorRTSubgraphPassActivationTest.DynamicShapeParam( + { + 'data': [1, 6, 8, 8] + }, {'data': [1, 6, 512, 512]}, {'data': [1, 6, 256, 256]}, False) + + def append_act(self, x): + return fluid.layers.gelu(x) + + +if __name__ == "__main__": + unittest.main() diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_conv_pass.py b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_conv_pass.py new file mode 100644 index 00000000000..0de37fce0ae --- /dev/null +++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_conv_pass.py @@ -0,0 +1,155 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import shutil +import unittest +import numpy as np +from inference_pass_test import InferencePassTest +import paddle.fluid as fluid +import paddle.fluid.core as core +from paddle.fluid.core import PassVersionChecker +from paddle.fluid.core import AnalysisConfig + + +class TensorRTSubgraphPassConvTest(InferencePassTest): + def setUp(self): + self.set_params() + with fluid.program_guard(self.main_program, self.startup_program): + data = fluid.data( + name="data", shape=[-1, 6, 64, 64], dtype="float32") + conv_out = fluid.layers.conv2d( + input=data, + num_filters=self.conv_num_filters, + filter_size=self.conv_filter_size, + groups=self.conv_groups, + padding=self.conv_padding, + bias_attr=False, + act=None) + self.feeds = { + "data": np.random.random([1, 6, 64, 64]).astype("float32"), + } + self.enable_trt = True + self.trt_parameters = TensorRTSubgraphPassConvTest.TensorRTParam( + 1 << 30, 32, 0, AnalysisConfig.Precision.Float32, False, False) + self.fetch_list = [conv_out] + + def set_params(self): + self.conv_num_filters = 6 + self.conv_filter_size = 6 + self.conv_groups = 3 + self.conv_padding = [1, 1] + + def test_check_output(self): + if core.is_compiled_with_cuda(): + use_gpu = True + self.check_output_with_option(use_gpu) + self.assertTrue( + PassVersionChecker.IsCompatible('tensorrt_subgraph_pass')) + + +class TensorRTSubgraphPassConvValidPaddingTest(TensorRTSubgraphPassConvTest): + def set_params(self): + self.conv_num_filters = 6 + self.conv_filter_size = 6 + self.conv_groups = 3 + self.conv_padding = 'VALID' + + +''' +# conv2d padded in 'SAME' mode is not yet supported in TRT, reopen this when support is complete. +class TensorRTSubgraphPassConvSamePaddingTest(InferencePassTest): + def set_params(self): + self.conv_num_filters = 6 + self.conv_filter_size = 6 + self.conv_groups = 3 + self.conv_padding = 'SAME' +''' + + +class TensorRTSubgraphPassDepthwiseConvTest(TensorRTSubgraphPassConvTest): + def set_params(self): + self.conv_num_filters = 6 + self.conv_filter_size = 6 + self.conv_groups = 6 + self.conv_padding = [1, 1] + + +class TensorRTSubgraphPassConvTransposeTest(InferencePassTest): + def setUp(self): + self.set_params() + with fluid.program_guard(self.main_program, self.startup_program): + data = fluid.data( + name="data", shape=[-1, 6, 64, 64], dtype="float32") + conv_out = fluid.layers.conv2d_transpose( + input=data, + num_filters=self.conv_num_filters, + filter_size=self.conv_filter_size, + groups=self.conv_groups, + padding=self.conv_padding, + bias_attr=False, + act=None) + self.feeds = { + "data": np.random.random([1, 6, 64, 64]).astype("float32"), + } + self.enable_trt = True + self.trt_parameters = TensorRTSubgraphPassConvTransposeTest.TensorRTParam( + 1 << 30, 32, 0, AnalysisConfig.Precision.Float32, False, False) + self.fetch_list = [conv_out] + + def set_params(self): + self.conv_num_filters = 6 + self.conv_filter_size = 6 + self.conv_groups = 1 + self.conv_padding = [1, 1] + + def test_check_output(self): + if core.is_compiled_with_cuda(): + use_gpu = True + self.check_output_with_option(use_gpu) + self.assertTrue( + PassVersionChecker.IsCompatible('tensorrt_subgraph_pass')) + + +class TensorRTSubgraphPassConvTransposeValidPaddingTest( + TensorRTSubgraphPassConvTransposeTest): + def set_params(self): + self.conv_num_filters = 6 + self.conv_filter_size = 6 + self.conv_groups = 1 + self.conv_padding = 'VALID' + + +''' +# conv2d_transpose padded in 'SAME' mode is not yet supported in TRT, reopen this when support is complete. +class TensorRTSubgraphPassConvTransposeSamePaddingTest(TensorRTSubgraphPassConvTransposeTest): + def set_params(self): + self.conv_num_filters = 6 + self.conv_filter_size = 6 + self.conv_groups = 1 + self.conv_padding = 'SAME' +''' + + +class TensorRTSubgraphPassDepthwiseConvTransposeTest( + TensorRTSubgraphPassConvTransposeTest): + def set_params(self): + self.conv_num_filters = 6 + self.conv_filter_size = 6 + self.conv_groups = 1 + self.conv_padding = [1, 1] + + +if __name__ == "__main__": + unittest.main() diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_subgraph_pass.py b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_subgraph_pass.py index 39e15523231..e4a7305f70f 100644 --- a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_subgraph_pass.py +++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_subgraph_pass.py @@ -23,134 +23,6 @@ from paddle.fluid.core import PassVersionChecker from paddle.fluid.core import AnalysisConfig -class TensorRTSubgraphPassConvTest(InferencePassTest): - def setUp(self): - self.set_params() - with fluid.program_guard(self.main_program, self.startup_program): - data = fluid.data( - name="data", shape=[-1, 6, 64, 64], dtype="float32") - conv_out = fluid.layers.conv2d( - input=data, - num_filters=self.conv_num_filters, - filter_size=self.conv_filter_size, - groups=self.conv_groups, - padding=self.conv_padding, - bias_attr=False, - act=None) - self.feeds = { - "data": np.random.random([1, 6, 64, 64]).astype("float32"), - } - self.enable_trt = True - self.trt_parameters = TensorRTSubgraphPassConvTest.TensorRTParam( - 1 << 30, 32, 0, AnalysisConfig.Precision.Float32, False, False) - self.fetch_list = [conv_out] - - def set_params(self): - self.conv_num_filters = 6 - self.conv_filter_size = 6 - self.conv_groups = 3 - self.conv_padding = [1, 1] - - def test_check_output(self): - if core.is_compiled_with_cuda(): - use_gpu = True - self.check_output_with_option(use_gpu) - self.assertTrue( - PassVersionChecker.IsCompatible('tensorrt_subgraph_pass')) - - -class TensorRTSubgraphPassConvValidPaddingTest(TensorRTSubgraphPassConvTest): - def set_params(self): - self.conv_num_filters = 6 - self.conv_filter_size = 6 - self.conv_groups = 3 - self.conv_padding = 'VALID' - - -''' -# conv2d padded in 'SAME' mode is not yet supported in TRT, reopen this when support is complete. -class TensorRTSubgraphPassConvSamePaddingTest(InferencePassTest): - def set_params(self): - self.conv_num_filters = 6 - self.conv_filter_size = 6 - self.conv_groups = 3 - self.conv_padding = 'SAME' -''' - - -class TensorRTSubgraphPassDepthwiseConvTest(TensorRTSubgraphPassConvTest): - def set_params(self): - self.conv_num_filters = 6 - self.conv_filter_size = 6 - self.conv_groups = 6 - self.conv_padding = [1, 1] - - -class TensorRTSubgraphPassConvTransposeTest(InferencePassTest): - def setUp(self): - self.set_params() - with fluid.program_guard(self.main_program, self.startup_program): - data = fluid.data( - name="data", shape=[-1, 6, 64, 64], dtype="float32") - conv_out = fluid.layers.conv2d_transpose( - input=data, - num_filters=self.conv_num_filters, - filter_size=self.conv_filter_size, - groups=self.conv_groups, - padding=self.conv_padding, - bias_attr=False, - act=None) - self.feeds = { - "data": np.random.random([1, 6, 64, 64]).astype("float32"), - } - self.enable_trt = True - self.trt_parameters = TensorRTSubgraphPassConvTransposeTest.TensorRTParam( - 1 << 30, 32, 0, AnalysisConfig.Precision.Float32, False, False) - self.fetch_list = [conv_out] - - def set_params(self): - self.conv_num_filters = 6 - self.conv_filter_size = 6 - self.conv_groups = 1 - self.conv_padding = [1, 1] - - def test_check_output(self): - if core.is_compiled_with_cuda(): - use_gpu = True - self.check_output_with_option(use_gpu) - self.assertTrue( - PassVersionChecker.IsCompatible('tensorrt_subgraph_pass')) - - -class TensorRTSubgraphPassConvTransposeValidPaddingTest( - TensorRTSubgraphPassConvTransposeTest): - def set_params(self): - self.conv_num_filters = 6 - self.conv_filter_size = 6 - self.conv_groups = 1 - self.conv_padding = 'VALID' - - -''' -# conv2d_transpose padded in 'SAME' mode is not yet supported in TRT, reopen this when support is complete. -class TensorRTSubgraphPassConvTransposeSamePaddingTest(TensorRTSubgraphPassConvTransposeTest): - def set_params(self): - self.conv_num_filters = 6 - self.conv_filter_size = 6 - self.conv_groups = 1 - self.conv_padding = 'SAME' -''' - - -class TensorRTSubgraphPassDepthwiseConvTransposeTest( - TensorRTSubgraphPassConvTransposeTest): - def set_params(self): - self.conv_num_filters = 6 - self.conv_filter_size = 6 - self.conv_groups = 1 - self.conv_padding = [1, 1] - - class TensorRTSubgraphPassFcTest(InferencePassTest): def setUp(self): with fluid.program_guard(self.main_program, self.startup_program): @@ -282,207 +154,6 @@ class TensorRTSubgraphPassValidPaddingPoolTest(InferencePassTest): self.exclusive = False -class TensorRTSubgraphPassActivationTest(InferencePassTest): - def setUpTensorRTParam(self): - self.enable_trt = True - self.trt_parameters = TensorRTSubgraphPassActivationTest.TensorRTParam( - 1 << 30, 32, 0, AnalysisConfig.Precision.Float32, False, False) - - def setUp(self): - self.setUpTensorRTParam() - with fluid.program_guard(self.main_program, self.startup_program): - data = fluid.data( - name="data", shape=[-1, 6, 64, 64], dtype="float32") - act_out = self.append_act(data) - out = fluid.layers.batch_norm(act_out, is_test=True) - self.feeds = { - "data": np.random.random([1, 6, 64, 64]).astype("float32"), - } - self.fetch_list = [out] - - def append_act(self, x): - return fluid.layers.relu(x) - - def test_check_output(self): - if core.is_compiled_with_cuda(): - use_gpu = True - if os.path.exists(self.path + "_opt_cache"): - shutil.rmtree(self.path + "_opt_cache") - if self.trt_parameters.precision == AnalysisConfig.Precision.Float32: - self.check_output_with_option(use_gpu) - else: - self.check_output_with_option(use_gpu, 1e-3) - self.assertTrue( - PassVersionChecker.IsCompatible('tensorrt_subgraph_pass')) - - -class TensorRTSubgraphPassLeakyReluTest(TensorRTSubgraphPassActivationTest): - def append_act(self, x): - return fluid.layers.leaky_relu(x) - - -class TensorRTSubgraphPassRelu6Test(TensorRTSubgraphPassActivationTest): - def append_act(self, x): - return fluid.layers.relu6(x) - - -class TensorRTSubgraphPassSoftMaxTest(TensorRTSubgraphPassActivationTest): - def append_act(self, x): - return fluid.layers.softmax(x) - - -class TensorRTSubgraphPassSigmoidTest(TensorRTSubgraphPassActivationTest): - def append_act(self, x): - return fluid.layers.sigmoid(x) - - -class TensorRTSubgraphPassHardSwishTest(TensorRTSubgraphPassActivationTest): - def append_act(self, x): - return fluid.layers.hard_swish(x) - - -class TensorRTSubgraphPassHardSwishPluginTest( - TensorRTSubgraphPassActivationTest): - def append_act(self, x): - return fluid.layers.hard_swish(x, threshold=4.0, scale=8.0) - - -class TensorRTSubgraphPassHardSigmoidTest(TensorRTSubgraphPassActivationTest): - def append_act(self, x): - return fluid.layers.hard_sigmoid(x) - - -class TensorRTSubgraphPassClipTest(TensorRTSubgraphPassActivationTest): - def append_act(self, x): - return fluid.layers.clip(x, 0, 1) - - -class TensorRTSubgraphPassTanhTest(TensorRTSubgraphPassActivationTest): - def append_act(self, x): - return fluid.layers.tanh(x) - - -class TensorRTSubgraphPassSwishTest(TensorRTSubgraphPassActivationTest): - def setUpTensorRTParam(self): - self.enable_trt = True - self.trt_parameters = TensorRTSubgraphPassActivationTest.TensorRTParam( - 1 << 30, 32, 0, AnalysisConfig.Precision.Float32, True, False) - - def append_act(self, x): - return fluid.layers.swish(x) - - -class TensorRTSubgraphPassSwishFp16SerializeTest( - TensorRTSubgraphPassActivationTest): - def setUpTensorRTParam(self): - self.enable_trt = True - self.trt_parameters = TensorRTSubgraphPassActivationTest.TensorRTParam( - 1 << 30, 32, 0, AnalysisConfig.Precision.Half, True, False) - - def append_act(self, x): - return fluid.layers.swish(x) - - -class TensorRTSubgraphPassDynamicSwishFp16SerializeTest( - TensorRTSubgraphPassActivationTest): - def setUpTensorRTParam(self): - self.enable_trt = True - self.trt_parameters = TensorRTSubgraphPassActivationTest.TensorRTParam( - 1 << 30, 32, 0, AnalysisConfig.Precision.Half, True, False) - self.dynamic_shape_params = TensorRTSubgraphPassActivationTest.DynamicShapeParam( - { - 'data': [1, 6, 8, 8] - }, {'data': [1, 6, 512, 512]}, {'data': [1, 6, 256, 256]}, False) - - def append_act(self, x): - return fluid.layers.swish(x) - - -class TensorRTSubgraphPassPreluAllTest(TensorRTSubgraphPassActivationTest): - def append_act(self, x): - return fluid.layers.prelu(x, mode='all') - - -class TensorRTSubgraphPassPreluChannelTest(TensorRTSubgraphPassActivationTest): - def append_act(self, x): - return fluid.layers.prelu(x, mode='channel') - - -class TensorRTSubgraphPassPreluElementTest(TensorRTSubgraphPassActivationTest): - def append_act(self, x): - return fluid.layers.prelu(x, mode='element') - - -class TensorRTSubgraphPassGeluTest(TensorRTSubgraphPassActivationTest): - def append_act(self, x): - return fluid.layers.gelu(x) - - -class TensorRTSubgraphPassGeluDynamicTest(TensorRTSubgraphPassActivationTest): - def setUpTensorRTParam(self): - self.enable_trt = True - self.trt_parameters = TensorRTSubgraphPassActivationTest.TensorRTParam( - 1 << 30, 32, 0, AnalysisConfig.Precision.Float32, False, False) - self.dynamic_shape_params = TensorRTSubgraphPassActivationTest.DynamicShapeParam( - { - 'data': [1, 6, 8, 8] - }, {'data': [1, 6, 512, 512]}, {'data': [1, 6, 256, 256]}, False) - - def append_act(self, x): - return fluid.layers.gelu(x) - - -class TensorRTSubgraphPassGeluFp16Test(TensorRTSubgraphPassActivationTest): - def setUpTensorRTParam(self): - self.enable_trt = True - self.trt_parameters = TensorRTSubgraphPassActivationTest.TensorRTParam( - 1 << 30, 32, 0, AnalysisConfig.Precision.Half, False, False) - - def append_act(self, x): - return fluid.layers.gelu(x) - - -class TensorRTSubgraphPassGeluFp16SerializeTest( - TensorRTSubgraphPassActivationTest): - def setUpTensorRTParam(self): - self.enable_trt = True - self.trt_parameters = TensorRTSubgraphPassActivationTest.TensorRTParam( - 1 << 30, 32, 0, AnalysisConfig.Precision.Half, True, False) - - def append_act(self, x): - return fluid.layers.gelu(x) - - -class TensorRTSubgraphPassGeluFp16DynamicTest( - TensorRTSubgraphPassActivationTest): - def setUpTensorRTParam(self): - self.enable_trt = True - self.trt_parameters = TensorRTSubgraphPassActivationTest.TensorRTParam( - 1 << 30, 32, 0, AnalysisConfig.Precision.Half, False, False) - self.dynamic_shape_params = TensorRTSubgraphPassActivationTest.DynamicShapeParam( - { - 'data': [1, 6, 8, 8] - }, {'data': [1, 6, 512, 512]}, {'data': [1, 6, 256, 256]}, False) - - def append_act(self, x): - return fluid.layers.gelu(x) - - -class TensorRTSubgraphPassGeluFp16DynamicSerializeTest( - TensorRTSubgraphPassActivationTest): - def setUpTensorRTParam(self): - self.enable_trt = True - self.trt_parameters = TensorRTSubgraphPassActivationTest.TensorRTParam( - 1 << 30, 32, 0, AnalysisConfig.Precision.Half, True, False) - self.dynamic_shape_params = TensorRTSubgraphPassActivationTest.DynamicShapeParam( - { - 'data': [1, 6, 8, 8] - }, {'data': [1, 6, 512, 512]}, {'data': [1, 6, 256, 256]}, False) - - def append_act(self, x): - return fluid.layers.gelu(x) - - class TensorRTSubgraphPassConcatTest(InferencePassTest): def setUp(self): with fluid.program_guard(self.main_program, self.startup_program): @@ -570,7 +241,7 @@ class TensorRTSubgraphPassDynamicSplitFp16SerializeTest(InferencePassTest): self.enable_trt = True self.trt_parameters = TensorRTSubgraphPassSplitTest.TensorRTParam( 1 << 30, 32, 0, AnalysisConfig.Precision.Half, True, False) - self.dynamic_shape_params = TensorRTSubgraphPassActivationTest.DynamicShapeParam( + self.dynamic_shape_params = TensorRTSubgraphPassDynamicSplitFp16SerializeTest.DynamicShapeParam( { 'data': [1, 3, 8, 64] }, {'data': [1, 3, 512, 64]}, {'data': [1, 3, 256, 64]}, False) diff --git a/tools/dockerfile/build_scripts/build_utils.sh b/tools/dockerfile/build_scripts/build_utils.sh index c8e0b6c3f27..d3098686594 100755 --- a/tools/dockerfile/build_scripts/build_utils.sh +++ b/tools/dockerfile/build_scripts/build_utils.sh @@ -130,6 +130,12 @@ function build_cpython { function build_cpythons { for py_ver in $@; do + if [ ${py_ver} == "2.7.15" ]; then + GET_PIP_URL="https://bootstrap.pypa.io/2.7/get-pip.py" + elif [ ${py_ver} == "3.5.1" ] ;then + GET_PIP_URL="https://bootstrap.pypa.io/3.5/get-pip.py" + fi + check_var $GET_PIP_URL curl -sLO $GET_PIP_URL build_cpython $py_ver diff --git a/tools/dockerfile/ci_dockerfile.sh b/tools/dockerfile/ci_dockerfile.sh index 2fa3d5141e5..15196e30516 100644 --- a/tools/dockerfile/ci_dockerfile.sh +++ b/tools/dockerfile/ci_dockerfile.sh @@ -41,9 +41,9 @@ function make_centos_dockerfile(){ sed "s//11.0-cudnn8-devel-centos7/g" Dockerfile.centos >${dockerfile_name} sed -i "s#COPY build_scripts /build_scripts#COPY tools/dockerfile/build_scripts ./build_scripts#g" ${dockerfile_name} dockerfile_line=$(wc -l ${dockerfile_name}|awk '{print $1}') + sed -i "${dockerfile_line}i RUN rm -f /usr/bin/cc && ln -s /usr/local/gcc-8.2/bin/gcc /usr/bin/cc" ${dockerfile_name} sed -i "${dockerfile_line}i RUN ln -s /usr/lib64/libz.so /usr/local/lib/libz.so \\ - RUN ln -s /usr/local/lib/libnccl.so /usr/local/cuda/lib64/ \\ - RUN rm -rf /usr/include/NvInfer*" ${dockerfile_name} + RUN ln -s /usr/local/lib/libnccl.so /usr/local/cuda/lib64/" ${dockerfile_name} sed -i $"${dockerfile_line}i RUN wget --no-check-certificate -q https://paddle-edl.bj.bcebos.com/hadoop-2.7.7.tar.gz \\ RUN tar -xzf hadoop-2.7.7.tar.gz && mv hadoop-2.7.7 /usr/local/" ${dockerfile_name} sed -i "s#RUN bash build_scripts/build.sh#RUN bash build_scripts/install_gcc.sh gcc82 \nRUN mv /usr/bin/cc /usr/bin/cc.bak \&\& ln -s /usr/local/gcc-8.2/bin/gcc /usr/bin/cc \nENV PATH=/usr/local/gcc-8.2/bin:\$PATH \nRUN bash build_scripts/build.sh#g" ${dockerfile_name} -- GitLab