diff --git a/paddle/fluid/inference/api/analysis_predictor.cc b/paddle/fluid/inference/api/analysis_predictor.cc index 5a4541e80d12cd887a2b743a9b5f7c92c16288a2..f5cf7e5204022084e52242aa6d77736f082c7341 100644 --- a/paddle/fluid/inference/api/analysis_predictor.cc +++ b/paddle/fluid/inference/api/analysis_predictor.cc @@ -942,8 +942,6 @@ USE_TRT_CONVERTER(batch_norm); USE_TRT_CONVERTER(concat); USE_TRT_CONVERTER(dropout); USE_TRT_CONVERTER(pad); -USE_TRT_CONVERTER(hard_sigmoid); -USE_TRT_CONVERTER(hard_swish); USE_TRT_CONVERTER(split); USE_TRT_CONVERTER(prelu); USE_TRT_CONVERTER(conv2d_transpose); diff --git a/paddle/fluid/inference/tensorrt/convert/CMakeLists.txt b/paddle/fluid/inference/tensorrt/convert/CMakeLists.txt index 47bd7ce0376af3ec2fa521d1c08c1ad4304666c5..dacea1ebcb2efa5d74d8f1b37f279fb40bda6f5e 100644 --- a/paddle/fluid/inference/tensorrt/convert/CMakeLists.txt +++ b/paddle/fluid/inference/tensorrt/convert/CMakeLists.txt @@ -3,7 +3,7 @@ nv_library(tensorrt_converter SRCS mul_op.cc conv2d_op.cc fc_op.cc pool2d_op.cc elementwise_op.cc batch_norm_op.cc activation_op.cc softmax_op.cc concat_op.cc dropout_op.cc pad_op.cc split_op.cc prelu_op.cc leaky_relu_op.cc gelu_op.cc layer_norm_op.cc multihead_matmul_op.cc - shuffle_channel_op.cc swish_op.cc instance_norm_op.cc emb_eltwise_layernorm.cc skip_layernorm.cc hard_sigmoid_op.cc hard_swish_op.cc + shuffle_channel_op.cc swish_op.cc instance_norm_op.cc emb_eltwise_layernorm.cc skip_layernorm.cc DEPS tensorrt_engine tensorrt_plugin operator scope framework_proto op_registry) nv_test(test_op_converter SRCS test_op_converter.cc DEPS diff --git a/paddle/fluid/inference/tensorrt/convert/hard_sigmoid_op.cc b/paddle/fluid/inference/tensorrt/convert/hard_sigmoid_op.cc deleted file mode 100644 index d8c42f2f7f75b5a089d5e42b9b8c3ac50575b0ec..0000000000000000000000000000000000000000 --- a/paddle/fluid/inference/tensorrt/convert/hard_sigmoid_op.cc +++ /dev/null @@ -1,49 +0,0 @@ -/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - -http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "paddle/fluid/inference/tensorrt/convert/op_converter.h" - -namespace paddle { -namespace inference { -namespace tensorrt { - -/* - * HardSigmoidOp, IActivationLayer in TRT. This Layer doesn't has weights. - */ -class HardSigmoidOpConverter : public OpConverter { - public: - void operator()(const framework::proto::OpDesc& op, - const framework::Scope& scope, bool test_mode) override { - VLOG(3) << "convert a fluid HardSigmoid op to tensorrt IActivationLayer " - "layer without bias"; - framework::OpDesc op_desc(op, nullptr); - // Declare inputs - auto* input = engine_->GetITensor(op_desc.Input("X")[0]); - float slope = boost::get(op_desc.GetAttr("slope")); - float offset = boost::get(op_desc.GetAttr("offset")); - auto* layer = TRT_ENGINE_ADD_LAYER(engine_, Activation, *input, - nvinfer1::ActivationType::kHARD_SIGMOID); - layer->setAlpha(slope); - layer->setBeta(offset); - - auto output_name = op_desc.Output("Out")[0]; - RreplenishLayerAndOutput(layer, "hard_sigmoid", {output_name}, test_mode); - } -}; - -} // namespace tensorrt -} // namespace inference -} // namespace paddle - -REGISTER_TRT_OP_CONVERTER(hard_sigmoid, HardSigmoidOpConverter); diff --git a/paddle/fluid/inference/tensorrt/convert/hard_swish_op.cc b/paddle/fluid/inference/tensorrt/convert/hard_swish_op.cc deleted file mode 100644 index 809dc415c32e3abaf94c2cb1473206f4ed7d69cc..0000000000000000000000000000000000000000 --- a/paddle/fluid/inference/tensorrt/convert/hard_swish_op.cc +++ /dev/null @@ -1,72 +0,0 @@ -/* Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - -http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "paddle/fluid/inference/tensorrt/convert/op_converter.h" -#include "paddle/fluid/inference/tensorrt/plugin/hard_swish_op_plugin.h" - -namespace paddle { -namespace inference { -namespace tensorrt { - -/* - * HardSwish converter from fluid to tensorRT. - */ -class HardSwishOpConverter : public OpConverter { - public: - void operator()(const framework::proto::OpDesc& op, - const framework::Scope& scope, bool test_mode) override { - VLOG(4) << "convert fluid HardSwish op to tensorrt HardSwish plugin"; - - framework::OpDesc op_desc(op, nullptr); - // Declare inputs - int input_num = op_desc.Input("X").size(); - PADDLE_ENFORCE_EQ( - input_num, 1, - platform::errors::InvalidArgument( - "HardSwish op has only 1 input, but got %d", input_num)); - auto* input = engine_->GetITensor(op_desc.Input("X")[0]); - // Get output - size_t output_num = op_desc.Output("Out").size(); - PADDLE_ENFORCE_EQ( - output_num, 1, - platform::errors::InvalidArgument( - "HardSwish op has only 1 output, but got %d", output_num)); - - const float threshold = - op_desc.HasAttr("threshold") - ? boost::get(op_desc.GetAttr("threshold")) - : 6.0f; - const float scale = op_desc.HasAttr("scale") - ? boost::get(op_desc.GetAttr("scale")) - : 6.0f; - const float offset = op_desc.HasAttr("offset") - ? boost::get(op_desc.GetAttr("offset")) - : 3.0f; - - nvinfer1::ILayer* layer = nullptr; - - plugin::HardSwishPlugin* plugin = - new plugin::HardSwishPlugin(threshold, scale, offset); - layer = engine_->AddPlugin(&input, input_num, plugin); - - auto output_name = op_desc.Output("Out")[0]; - RreplenishLayerAndOutput(layer, "hard_swish", {output_name}, test_mode); - } -}; - -} // namespace tensorrt -} // namespace inference -} // namespace paddle - -REGISTER_TRT_OP_CONVERTER(hard_swish, HardSwishOpConverter); diff --git a/paddle/fluid/inference/tensorrt/op_teller.cc b/paddle/fluid/inference/tensorrt/op_teller.cc index b1ebd951ae3bc49892d4a04e64039fbf9c93a09c..facd473dd396b6221fe2e362c486d5222dd561c4 100644 --- a/paddle/fluid/inference/tensorrt/op_teller.cc +++ b/paddle/fluid/inference/tensorrt/op_teller.cc @@ -54,8 +54,6 @@ struct SimpleOpTypeSetTeller : public Teller { "relu", "softmax", "sigmoid", - "hard_sigmoid", - "hard_swish", "depthwise_conv2d", "batch_norm", "concat", diff --git a/paddle/fluid/inference/tensorrt/plugin/CMakeLists.txt b/paddle/fluid/inference/tensorrt/plugin/CMakeLists.txt index dc3e75389e32a6b8fb3aef9620c04d8250270b9a..86edc85712ea4b171ce3353cae7b610ba7e279cd 100644 --- a/paddle/fluid/inference/tensorrt/plugin/CMakeLists.txt +++ b/paddle/fluid/inference/tensorrt/plugin/CMakeLists.txt @@ -3,5 +3,5 @@ nv_library(tensorrt_plugin prelu_op_plugin.cu trt_plugin_factory.cc gelu_op_plugin.cu pool_op_plugin.cu swish_op_plugin.cu layer_norm_op_plugin.cu instance_norm_op_plugin.cu emb_eltwise_layernorm_plugin.cu -qkv_to_context_plugin.cu skip_layernorm_op_plugin.cu hard_swish_op_plugin.cu +qkv_to_context_plugin.cu skip_layernorm_op_plugin.cu DEPS enforce tensorrt_engine prelu tensor bert_encoder_functor) diff --git a/paddle/fluid/inference/tensorrt/plugin/hard_swish_op_plugin.cu b/paddle/fluid/inference/tensorrt/plugin/hard_swish_op_plugin.cu deleted file mode 100644 index 8b2d0ac3cf70f77f1ff9ce9a6fe2ed19fdcf9576..0000000000000000000000000000000000000000 --- a/paddle/fluid/inference/tensorrt/plugin/hard_swish_op_plugin.cu +++ /dev/null @@ -1,86 +0,0 @@ -// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include -#include -#include "paddle/fluid/inference/tensorrt/plugin/hard_swish_op_plugin.h" -#include "paddle/fluid/inference/tensorrt/plugin/trt_plugin_factory.h" - -namespace paddle { -namespace inference { -namespace tensorrt { -namespace plugin { - -HardSwishPlugin* CreateHardSwishPluginDeserialize(const void* buffer, - size_t length) { - return new HardSwishPlugin(buffer, length); -} - -REGISTER_TRT_PLUGIN("hard_swish_plugin", CreateHardSwishPluginDeserialize); - -nvinfer1::Dims HardSwishPlugin::getOutputDimensions( - int index, const nvinfer1::Dims* in_dims, int nb_inputs) { - assert(nb_inputs == 1); - assert(index < this->getNbOutputs()); - nvinfer1::Dims const& input_dims = in_dims[0]; - nvinfer1::Dims output_dims = input_dims; - return output_dims; -} - -template -__device__ T kMax(T a, T b) { - return a > b ? a : b; -} - -template -__device__ T kMin(T a, T b) { - return a < b ? a : b; -} - -template -__global__ void hard_swish_kernel(float threshold, float scale, float offset, - int n, const T* input, T* output) { - const int idx = blockIdx.x * TPB + threadIdx.x; - if (idx < n) { - const T in = input[idx]; - output[idx] = in / scale * kMin(kMax(in + offset, 0), threshold); - } -} - -int HardSwishPlugin::enqueue(int batch_size, const void* const* inputs, - void** outputs, void*, cudaStream_t stream) { - const auto& input_dims = this->getInputDims(0); - int num = batch_size; - for (int i = 0; i < input_dims.nbDims; i++) { - num *= input_dims.d[i]; - } - float threshold = threshold_; - float scale = scale_; - float offset = offset_; - - const int block_size = 256; - const int grid_size = (num + block_size - 1) / block_size; - - const float* input = static_cast(inputs[0]); - float* output = static_cast(outputs[0]); - hard_swish_kernel<<>>( - threshold, scale, offset, num, input, output); - - return cudaGetLastError() != cudaSuccess; -} - -} // namespace plugin -} // namespace tensorrt -} // namespace inference -} // namespace paddle diff --git a/paddle/fluid/inference/tensorrt/plugin/hard_swish_op_plugin.h b/paddle/fluid/inference/tensorrt/plugin/hard_swish_op_plugin.h deleted file mode 100644 index 2e1e1d03baf7e1cb046f887f2d799a907f3586d4..0000000000000000000000000000000000000000 --- a/paddle/fluid/inference/tensorrt/plugin/hard_swish_op_plugin.h +++ /dev/null @@ -1,80 +0,0 @@ -// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#pragma once -#include -#include -#include -#include -#include "paddle/fluid/inference/tensorrt/engine.h" -#include "paddle/fluid/inference/tensorrt/plugin/trt_plugin.h" - -namespace paddle { -namespace inference { -namespace tensorrt { -namespace plugin { - -class HardSwishPlugin : public PluginTensorRT { - public: - HardSwishPlugin(const float threshold, const float scale, const float offset) - : threshold_(threshold), scale_(scale), offset_(offset) {} - - // It was used for tensorrt deserialization. - // It should not be called by users. - HardSwishPlugin(void const* serialData, size_t serialLength) { - deserializeBase(serialData, serialLength); - DeserializeValue(&serialData, &serialLength, &threshold_); - DeserializeValue(&serialData, &serialLength, &scale_); - DeserializeValue(&serialData, &serialLength, &offset_); - } - - ~HardSwishPlugin() {} - HardSwishPlugin* clone() const override { - return new HardSwishPlugin(threshold_, scale_, offset_); - } - - const char* getPluginType() const override { return "hard_swish_plugin"; } - int getNbOutputs() const override { return 1; } - int initialize() override { return 0; } - nvinfer1::Dims getOutputDimensions(int index, const nvinfer1::Dims* inputs, - int nbInputDims) override; - int enqueue(int batchSize, const void* const* inputs, void** outputs, - void* workspace, cudaStream_t stream) override; - - protected: - float threshold_; - float scale_; - float offset_; - - size_t getSerializationSize() override { - return getBaseSerializationSize() + SerializedSize(threshold_) + - SerializedSize(scale_) + SerializedSize(offset_) + - SerializedSize(getPluginType()); - } - - // TRT will call this func to serialize the configuration of TRT - // It should not be called by users. - void serialize(void* buffer) override { - SerializeValue(&buffer, getPluginType()); - serializeBase(buffer); - SerializeValue(&buffer, threshold_); - SerializeValue(&buffer, scale_); - SerializeValue(&buffer, offset_); - } -}; - -} // namespace plugin -} // namespace tensorrt -} // namespace inference -} // namespace paddle