diff --git a/paddle/fluid/inference/analysis/analyzer.cc b/paddle/fluid/inference/analysis/analyzer.cc index 7d16364609463e9c48720e772cebee7731dfd452..0d94ccb64e024dbdef20c1cc16aab76bc5db928c 100644 --- a/paddle/fluid/inference/analysis/analyzer.cc +++ b/paddle/fluid/inference/analysis/analyzer.cc @@ -72,7 +72,7 @@ class DfgPassManagerImpl final : public DfgPassManager { auto trt_teller = [&](const Node* node) { std::unordered_set teller_set( {"elementwise_add", "mul", "conv2d", "pool2d", "relu", "softmax", - "depthwise_conv2d", "batch_norm"}); + "depthwise_conv2d", "batch_norm", "concat"}); if (!node->IsFunction()) return false; const auto* func = static_cast(node); diff --git a/paddle/fluid/inference/api/api_tensorrt_subgraph_engine.cc b/paddle/fluid/inference/api/api_tensorrt_subgraph_engine.cc index 9ac037297167fe7de29925ffe36f4d39efb65313..93de7a5209e7dc289b4b02e73ef3bb20bfc8c774 100644 --- a/paddle/fluid/inference/api/api_tensorrt_subgraph_engine.cc +++ b/paddle/fluid/inference/api/api_tensorrt_subgraph_engine.cc @@ -32,6 +32,7 @@ class TensorRTSubgraphPredictor : public NativePaddlePredictor { : NativePaddlePredictor(config), config_(config) {} bool Init(const std::shared_ptr& parent_scope) { + FLAGS_IA_enable_tensorrt_subgraph_engine = true; VLOG(3) << "Predictor::init()"; FLAGS_tensorrt_max_batch_size = config_.max_batch_size; FLAGS_tensorrt_workspace_size = config_.workspace_size; @@ -161,3 +162,4 @@ USE_TRT_CONVERTER(fc); USE_TRT_CONVERTER(pool2d); USE_TRT_CONVERTER(softmax); USE_TRT_CONVERTER(batch_norm); +USE_TRT_CONVERTER(concat); diff --git a/paddle/fluid/inference/api/api_tensorrt_subgraph_engine_tester.cc b/paddle/fluid/inference/api/api_tensorrt_subgraph_engine_tester.cc index 8f1a72316d6c146ebc9a86ced739ef088a3b4267..9e7425eddd2df07ffe897f908aad360abe42117a 100644 --- a/paddle/fluid/inference/api/api_tensorrt_subgraph_engine_tester.cc +++ b/paddle/fluid/inference/api/api_tensorrt_subgraph_engine_tester.cc @@ -37,6 +37,7 @@ void CompareTensorRTWithFluid(bool enable_tensorrt) { config1.use_gpu = true; config1.fraction_of_gpu_memory = 0.3; config1.device = 0; + config1.max_batch_size = 10; auto predictor0 = CreatePaddlePredictor(config0); diff --git a/paddle/fluid/inference/tensorrt/convert/CMakeLists.txt b/paddle/fluid/inference/tensorrt/convert/CMakeLists.txt index 2a449eb95e3537a11962912a6a3f29e89958fbd8..9d7be2d03cf7bb12afe7e52d9630f184d689dc25 100644 --- a/paddle/fluid/inference/tensorrt/convert/CMakeLists.txt +++ b/paddle/fluid/inference/tensorrt/convert/CMakeLists.txt @@ -1,7 +1,7 @@ # Add TRT tests nv_library(tensorrt_converter SRCS mul_op.cc conv2d_op.cc fc_op.cc pool2d_op.cc elementwise_op.cc -batch_norm_op.cc activation_op.cc softmax_op.cc +batch_norm_op.cc activation_op.cc softmax_op.cc concat_op.cc DEPS tensorrt_engine operator scope framework_proto op_registry) nv_test(test_op_converter SRCS test_op_converter.cc DEPS @@ -18,12 +18,12 @@ nv_test(test_trt_conv_op SRCS test_conv2d_op.cc conv2d_op.cc DEPS ${FLUID_CORE_MODULES} tensorrt_engine conv_op SERIAL) nv_test(test_trt_pool2d_op SRCS test_pool2d_op.cc pool2d_op.cc DEPS ${FLUID_CORE_MODULES} tensorrt_engine pool_op SERIAL) - nv_test(test_trt_elementwise_op SRCS test_elementwise_op.cc elementwise_op.cc DEPS ${FLUID_CORE_MODULES} tensorrt_engine elementwise_add_op SERIAL) - nv_test(test_trt_softmax_op SRCS test_softmax_op.cc softmax_op.cc DEPS ${FLUID_CORE_MODULES} tensorrt_engine softmax_op SERIAL) - nv_test(test_trt_batch_norm_op SRCS test_batch_norm_op.cc batch_norm_op.cc DEPS ${FLUID_CORE_MODULES} tensorrt_engine batch_norm_op SERIAL) + +nv_test(test_trt_concat_op SRCS test_concat_op.cc concat_op.cc + DEPS ${FLUID_CORE_MODULES} tensorrt_engine concat_op SERIAL) diff --git a/paddle/fluid/inference/tensorrt/convert/concat_op.cc b/paddle/fluid/inference/tensorrt/convert/concat_op.cc new file mode 100644 index 0000000000000000000000000000000000000000..bb9627bf957b63993b2c8d23e7ec8122eb004eaf --- /dev/null +++ b/paddle/fluid/inference/tensorrt/convert/concat_op.cc @@ -0,0 +1,57 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + +http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/fluid/inference/tensorrt/convert/op_converter.h" + +namespace paddle { +namespace inference { +namespace tensorrt { + +/* + * MulOp, IMatrixMultiplyLayer in TRT. This Layer doesn't has weights. + */ +class ConcatOpConverter : public OpConverter { + public: + void operator()(const framework::proto::OpDesc& op, + const framework::Scope& scope, bool test_mode) override { + VLOG(4) << "convert a fluid mul op to tensorrt mul layer without bias"; + + framework::OpDesc op_desc(op, nullptr); + // Declare inputs + std::vector itensors; + for (auto& input_name : op_desc.Input("X")) { + itensors.push_back(engine_->GetITensor(input_name)); + } + int axis = boost::get(op_desc.GetAttr("axis")); + PADDLE_ENFORCE(axis > 0, + "The axis attr of Concat op should be large than 0 for trt"); + + auto* layer = TRT_ENGINE_ADD_LAYER(engine_, Concatenation, itensors.data(), + itensors.size()); + axis = axis - 1; // Remove batch dim + layer->setAxis(axis); + auto output_name = op_desc.Output("Out")[0]; + engine_->SetITensor(output_name, layer->getOutput(0)); + if (test_mode) { // the test framework can not determine which is the + // output, so place the declaration inside. + engine_->DeclareOutput(output_name); + } + } +}; + +} // namespace tensorrt +} // namespace inference +} // namespace paddle + +REGISTER_TRT_OP_CONVERTER(concat, ConcatOpConverter); diff --git a/paddle/fluid/inference/tensorrt/convert/op_converter.h b/paddle/fluid/inference/tensorrt/convert/op_converter.h index 41faaf7212accaaec238062b1340e8da8fa6be33..d309d94c560f2b484fac6b6cd40cc2704d641069 100644 --- a/paddle/fluid/inference/tensorrt/convert/op_converter.h +++ b/paddle/fluid/inference/tensorrt/convert/op_converter.h @@ -79,6 +79,14 @@ class OpConverter { it = Registry::Lookup("elementwise_" + op_type + "_tensor"); } + PADDLE_ENFORCE_NOT_NULL(it, "no OpConverter for optype [%s]", + op_desc.Type()); + } + + if (op_desc.Type() == "depthwise_conv2d") { + it = Registry::Lookup("conv2d"); + PADDLE_ENFORCE_NOT_NULL(it, "no OpConverter for optype [%s]", + op_desc.Type()); } if (!it) { diff --git a/paddle/fluid/inference/tensorrt/convert/test_concat_op.cc b/paddle/fluid/inference/tensorrt/convert/test_concat_op.cc new file mode 100644 index 0000000000000000000000000000000000000000..4f284a4db5758e072915d7fd0f16115b8a36ba8b --- /dev/null +++ b/paddle/fluid/inference/tensorrt/convert/test_concat_op.cc @@ -0,0 +1,49 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + +http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include +#include "paddle/fluid/inference/tensorrt/convert/op_converter.h" +#include "paddle/fluid/inference/tensorrt/convert/ut_helper.h" + +namespace paddle { +namespace inference { +namespace tensorrt { + +TEST(concat_op, test) { + std::unordered_set parameters({""}); + framework::Scope scope; + TRTConvertValidation validator(10, parameters, scope, 1000); + validator.DeclInputVar("concat_x1", nvinfer1::DimsCHW(10, 3, 1)); + validator.DeclInputVar("concat_x2", nvinfer1::DimsCHW(3, 3, 1)); + validator.DeclInputVar("concat_x3", nvinfer1::DimsCHW(7, 3, 1)); + validator.DeclOutputVar("concat_out", nvinfer1::DimsCHW(20, 3, 1)); + + // Prepare Op description + framework::OpDesc desc; + desc.SetType("concat"); + desc.SetInput("X", {"concat_x1", "concat_x2", "concat_x3"}); + desc.SetOutput("Out", {"concat_out"}); + + int axis = 1; + desc.SetAttr("axis", axis); + + validator.SetOp(*desc.Proto()); + + validator.Execute(5); +} + +} // namespace tensorrt +} // namespace inference +} // namespace paddle +USE_OP(concat);