diff --git a/paddle/fluid/inference/api/analysis_predictor.cc b/paddle/fluid/inference/api/analysis_predictor.cc index 0d3a687c461d101b276b11dc571c5679a8217d19..181f8f6649a5d064bee40f00dae16ca588721a7f 100644 --- a/paddle/fluid/inference/api/analysis_predictor.cc +++ b/paddle/fluid/inference/api/analysis_predictor.cc @@ -1744,6 +1744,8 @@ USE_TRT_CONVERTER(flatten_contiguous_range); USE_TRT_CONVERTER(matmul); USE_TRT_CONVERTER(conv2d); USE_TRT_CONVERTER(relu); +USE_TRT_CONVERTER(exp); +USE_TRT_CONVERTER(log); USE_TRT_CONVERTER(sigmoid); USE_TRT_CONVERTER(tanh); USE_TRT_CONVERTER(fc); diff --git a/paddle/fluid/inference/tensorrt/convert/CMakeLists.txt b/paddle/fluid/inference/tensorrt/convert/CMakeLists.txt index ec8c1b2fcd75c22a8e895f6751ad4ddec74c0124..77c31f941d7fcf4799a14a32c6998e78aaedf36b 100644 --- a/paddle/fluid/inference/tensorrt/convert/CMakeLists.txt +++ b/paddle/fluid/inference/tensorrt/convert/CMakeLists.txt @@ -1,10 +1,38 @@ # Add TRT tests nv_library(tensorrt_converter - SRCS matmul_op.cc conv2d_op.cc fc_op.cc pool2d_op.cc elementwise_op.cc - batch_norm_op.cc activation_op.cc softmax_op.cc concat_op.cc dropout_op.cc group_norm_op.cc - pad_op.cc split_op.cc prelu_op.cc leaky_relu_op.cc gelu_op.cc layer_norm_op.cc multihead_matmul_op.cc - shuffle_channel_op.cc swish_op.cc instance_norm_op.cc stack_op.cc transpose_op.cc flatten_op.cc flatten_contiguous_range_op.cc - emb_eltwise_layernorm.cc skip_layernorm.cc scale_op.cc slice_op.cc hard_sigmoid_op.cc hard_swish_op.cc clip_op.cc + SRCS matmul_op.cc + conv2d_op.cc + fc_op.cc + pool2d_op.cc + elementwise_op.cc + batch_norm_op.cc + activation_op.cc + unary_op.cc + softmax_op.cc + concat_op.cc + dropout_op.cc + group_norm_op.cc + pad_op.cc + split_op.cc + prelu_op.cc + leaky_relu_op.cc + gelu_op.cc + layer_norm_op.cc + multihead_matmul_op.cc + shuffle_channel_op.cc + swish_op.cc + instance_norm_op.cc + stack_op.cc + transpose_op.cc + flatten_op.cc + flatten_contiguous_range_op.cc + emb_eltwise_layernorm.cc + skip_layernorm.cc + scale_op.cc + slice_op.cc + hard_sigmoid_op.cc + hard_swish_op.cc + clip_op.cc gather_op.cc anchor_generator_op.cc yolo_box_op.cc diff --git a/paddle/fluid/inference/tensorrt/convert/unary_op.cc b/paddle/fluid/inference/tensorrt/convert/unary_op.cc new file mode 100644 index 0000000000000000000000000000000000000000..aa3d38ebe20736ba7b05e76859ef9afafa1c14df --- /dev/null +++ b/paddle/fluid/inference/tensorrt/convert/unary_op.cc @@ -0,0 +1,84 @@ +/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + +http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include +#include +#include "glog/logging.h" +#include "paddle/fluid/framework/op_desc.h" +#include "paddle/fluid/inference/tensorrt/convert/op_converter.h" +#include "paddle/fluid/inference/tensorrt/engine.h" +#include "paddle/fluid/inference/tensorrt/helper.h" +#include "paddle/fluid/platform/enforce.h" + +namespace paddle { +namespace framework { +class Scope; + +namespace proto { +class OpDesc; +} // namespace proto +} // namespace framework +} // namespace paddle + +namespace paddle { +namespace inference { +namespace tensorrt { + +class UnaryOpConverter : public OpConverter { + public: + UnaryOpConverter() {} + void operator()(const framework::proto::OpDesc& op, + const framework::Scope& scope, bool test_mode) override { + // Here the two nullptr looks strange, that's because the + // framework::OpDesc's constructor is strange. + framework::OpDesc op_desc(op, nullptr); + VLOG(3) << "convert a fluid unary op to tensorrt unary layer whose " + "type is " + << op_type_; + nvinfer1::ITensor* input_tensor = + engine_->GetITensor(op_desc.Input("X")[0]); + auto op_pair = ops.find(op_type_); + nvinfer1::IUnaryLayer* layer = + TRT_ENGINE_ADD_LAYER(engine_, Unary, *input_tensor, op_pair->second); + auto output_name = op_desc.Output("Out")[0]; + RreplenishLayerAndOutput(layer, op_type_, {output_name}, test_mode); + } + + protected: + std::string op_type_; + static const std::unordered_map ops; +}; + +const std::unordered_map + UnaryOpConverter::ops = { + {"exp", nvinfer1::UnaryOperation::kEXP}, + {"log", nvinfer1::UnaryOperation::kLOG}, +}; + +class ExpOpConverter : public UnaryOpConverter { + public: + ExpOpConverter() { op_type_ = "exp"; } +}; + +class LogOpConverter : public UnaryOpConverter { + public: + LogOpConverter() { op_type_ = "log"; } +}; + +} // namespace tensorrt +} // namespace inference +} // namespace paddle + +REGISTER_TRT_OP_CONVERTER(exp, ExpOpConverter); +REGISTER_TRT_OP_CONVERTER(log, LogOpConverter); diff --git a/paddle/fluid/inference/tensorrt/op_teller.cc b/paddle/fluid/inference/tensorrt/op_teller.cc index 280a1e3708bdb8317d2f27b6c39c03fe328c3ec5..01f2a4fca9d4e9f2a1dcb9215b591e5d31f9eb1a 100644 --- a/paddle/fluid/inference/tensorrt/op_teller.cc +++ b/paddle/fluid/inference/tensorrt/op_teller.cc @@ -65,6 +65,8 @@ struct SimpleOpTypeSetTeller : public Teller { "conv2d_fusion", "pool2d", "relu", + "exp", + "log", "softmax", "sigmoid", "hard_swish", @@ -128,6 +130,8 @@ struct SimpleOpTypeSetTeller : public Teller { "conv2d_fusion", "pool2d", "relu", + "exp", + "log", "softmax", "sigmoid", "hard_swish", @@ -200,7 +204,7 @@ bool OpTeller::Tell(const framework::ir::Node* node, bool use_no_calib_int8, for (auto& teller : tellers_) { if (op_type == "relu" || op_type == "relu6" || op_type == "tanh" || - op_type == "sigmoid") { + op_type == "sigmoid" || op_type == "exp" || op_type == "log") { auto* block = desc.Block(); if (block == nullptr) { VLOG(3) << "The block desc is nullptr, we can't continue to analyze. " diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_unary.py b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_unary.py new file mode 100644 index 0000000000000000000000000000000000000000..2abf0a1acda67d7a13ca2f861a573e08a6764945 --- /dev/null +++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_unary.py @@ -0,0 +1,132 @@ +# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from trt_layer_auto_scan_test import TrtLayerAutoScanTest, SkipReasons +from program_config import TensorConfig, ProgramConfig +import unittest +import numpy as np +import paddle.inference as paddle_infer +from functools import partial +from typing import Optional, List, Callable, Dict, Any, Set + + +class TrtConvertActivationTest(TrtLayerAutoScanTest): + def is_program_valid(self, program_config: ProgramConfig) -> bool: + return True + + def sample_program_configs(self): + def generate_input1(dims, batch, attrs: List[Dict[str, Any]]): + if dims == 1: + return np.ones([32]).astype(np.float32) + elif dims == 2: + return np.ones([3, 32]).astype(np.float32) + elif dims == 3: + return np.ones([3, 32, 32]).astype(np.float32) + else: + return np.ones([batch, 3, 32, 32]).astype(np.float32) + + for dims in [1, 2, 3, 4]: + for batch in [1, 4]: + for op_type in ["exp", "log"]: + self.dims = dims + dics = [{}] + + ops_config = [{ + "op_type": op_type, + "op_inputs": { + "X": ["input_data"] + }, + "op_outputs": { + "Out": ["output_data"] + }, + "op_attrs": dics[0] + }] + ops = self.generate_op_config(ops_config) + + program_config = ProgramConfig( + ops=ops, + weights={}, + inputs={ + "input_data": TensorConfig(data_gen=partial( + generate_input1, dims, batch, dics)) + }, + outputs=["output_data"]) + + yield program_config + + def sample_predictor_configs( + self, program_config) -> (paddle_infer.Config, List[int], float): + def generate_dynamic_shape(attrs): + if self.dims == 1: + self.dynamic_shape.min_input_shape = {"input_data": [1]} + self.dynamic_shape.max_input_shape = {"input_data": [64]} + self.dynamic_shape.opt_input_shape = {"input_data": [32]} + elif self.dims == 2: + self.dynamic_shape.min_input_shape = {"input_data": [1, 16]} + self.dynamic_shape.max_input_shape = {"input_data": [4, 32]} + self.dynamic_shape.opt_input_shape = {"input_data": [3, 32]} + elif self.dims == 3: + self.dynamic_shape.min_input_shape = {"input_data": [1, 16, 16]} + self.dynamic_shape.max_input_shape = {"input_data": [4, 32, 32]} + self.dynamic_shape.opt_input_shape = {"input_data": [3, 32, 32]} + else: + self.dynamic_shape.min_input_shape = { + "input_data": [1, 3, 16, 16] + } + self.dynamic_shape.max_input_shape = { + "input_data": [4, 3, 32, 32] + } + self.dynamic_shape.opt_input_shape = { + "input_data": [1, 3, 32, 32] + } + + def clear_dynamic_shape(): + self.dynamic_shape.min_input_shape = {} + self.dynamic_shape.max_input_shape = {} + self.dynamic_shape.opt_input_shape = {} + + def generate_trt_nodes_num(attrs, dynamic_shape): + if self.dims == 1: + return 0, 3 + return 1, 2 + + attrs = [ + program_config.ops[i].attrs + for i in range(len(program_config.ops)) + ] + + # for static_shape + clear_dynamic_shape() + self.trt_param.precision = paddle_infer.PrecisionType.Float32 + yield self.create_inference_config(), generate_trt_nodes_num( + attrs, False), 1e-5 + self.trt_param.precision = paddle_infer.PrecisionType.Half + yield self.create_inference_config(), generate_trt_nodes_num( + attrs, False), 1e-5 + + # for dynamic_shape + generate_dynamic_shape(attrs) + self.trt_param.precision = paddle_infer.PrecisionType.Float32 + yield self.create_inference_config(), generate_trt_nodes_num(attrs, + True), 1e-5 + self.trt_param.precision = paddle_infer.PrecisionType.Half + yield self.create_inference_config(), generate_trt_nodes_num(attrs, + True), 1e-5 + + def test(self): + self.run_test() + + +if __name__ == "__main__": + unittest.main()