diff --git a/paddle/fluid/framework/ir/CMakeLists.txt b/paddle/fluid/framework/ir/CMakeLists.txt index be4151b54b6087c10989c093bf007ccf3006bd65..b7f7e2ee8ef590c0d0d8307de4400a8ce8ad4e7d 100644 --- a/paddle/fluid/framework/ir/CMakeLists.txt +++ b/paddle/fluid/framework/ir/CMakeLists.txt @@ -44,6 +44,7 @@ pass_library(seqconv_eltadd_relu_fuse_pass inference) pass_library(is_test_pass base) pass_library(conv_elementwise_add_act_fuse_pass inference) pass_library(conv_elementwise_add2_act_fuse_pass inference) +pass_library(conv_elementwise_add_fuse_pass inference) if(WITH_MKLDNN) pass_library(mkldnn_placement_pass base) pass_library(depthwise_conv_mkldnn_pass base) diff --git a/paddle/fluid/framework/ir/conv_elementwise_add_fuse_pass.cc b/paddle/fluid/framework/ir/conv_elementwise_add_fuse_pass.cc new file mode 100644 index 0000000000000000000000000000000000000000..476c9dbc353f865916d0065bbce653d7b7204dce --- /dev/null +++ b/paddle/fluid/framework/ir/conv_elementwise_add_fuse_pass.cc @@ -0,0 +1,91 @@ +// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include + +#include "paddle/fluid/framework/ir/conv_elementwise_add_fuse_pass.h" +#include "paddle/fluid/framework/ir/graph_viz_pass.h" + +namespace paddle { +namespace framework { +namespace ir { + +#define GET_IR_NODE(node__) GET_IR_NODE_FROM_SUBGRAPH(node__, node__, pattern); +#define GET_NODES \ + GET_IR_NODE(conv_op); \ + GET_IR_NODE(conv_out); \ + GET_IR_NODE(conv_filter); \ + GET_IR_NODE(elementwise_add_op); \ + GET_IR_NODE(elementwise_add_in_y); \ + GET_IR_NODE(elementwise_add_out); + +std::unique_ptr ConvElementwiseAddFusePass::ApplyImpl( + std::unique_ptr graph) const { + const std::string pattern_name = "conv_elementwise_add_fuse"; + FusePassBase::Init(pattern_name, graph.get()); + + GraphPatternDetector gpd; + auto* x = gpd.mutable_pattern() + ->NewNode("x") + ->assert_is_op_input("conv2d", "Input") + ->AsInput(); + + patterns::ConvElementwiseadd pattern(gpd.mutable_pattern(), pattern_name); + pattern(x); + + auto handler = [&](const GraphPatternDetector::subgraph_t& subgraph, + Graph* g) { + GET_NODES; + + auto base_op_desc = *conv_op->Op()->Proto(); + std::string bias_name = elementwise_add_in_y->Name(); + std::string output_name = elementwise_add_out->Name(); + + std::string act_type = "identity"; + framework::OpDesc new_op_desc(base_op_desc, nullptr); + new_op_desc.SetType("conv2d_fusion"); + new_op_desc.SetInput("Bias", {bias_name}); + new_op_desc.SetInput("ResidualData", {}); + new_op_desc.SetAttr("activation", act_type); + new_op_desc.SetOutput("Output", {output_name}); + new_op_desc.SetAttr("is_test", true); + new_op_desc.SetAttr("use_cudnn", false); + new_op_desc.Flush(); + + // Create a new node for the fused op. + auto* new_conv_op = graph->CreateOpNode(&new_op_desc); + + // Link inputs and outputs. + PADDLE_ENFORCE(subgraph.count(x)); + auto* conv_in_node = subgraph.at(x); + + IR_NODE_LINK_TO(conv_in_node, new_conv_op); // Input + IR_NODE_LINK_TO(conv_filter, new_conv_op); // Filter + IR_NODE_LINK_TO(elementwise_add_in_y, new_conv_op); // Bias + IR_NODE_LINK_TO(new_conv_op, elementwise_add_out); // Output + + // Delete the unneeded nodes. + GraphSafeRemoveNodes(graph.get(), {conv_op, conv_out, elementwise_add_op}); + }; + + gpd(graph.get(), handler); + return graph; +} + +} // namespace ir +} // namespace framework +} // namespace paddle + +REGISTER_PASS(conv_elementwise_add_fuse_pass, + paddle::framework::ir::ConvElementwiseAddFusePass); diff --git a/paddle/fluid/framework/ir/conv_elementwise_add_fuse_pass.h b/paddle/fluid/framework/ir/conv_elementwise_add_fuse_pass.h new file mode 100644 index 0000000000000000000000000000000000000000..f234603f5856a9238164f7fb0e5cc81ea9b7ed60 --- /dev/null +++ b/paddle/fluid/framework/ir/conv_elementwise_add_fuse_pass.h @@ -0,0 +1,33 @@ +// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once +#include "paddle/fluid/framework/ir/fuse_pass_base.h" +#include "paddle/fluid/framework/ir/graph_pattern_detector.h" + +namespace paddle { +namespace framework { +namespace ir { + +class ConvElementwiseAddFusePass : public FusePassBase { + public: + virtual ~ConvElementwiseAddFusePass() {} + + protected: + std::unique_ptr ApplyImpl(std::unique_ptr graph) const; +}; + +} // namespace ir +} // namespace framework +} // namespace paddle diff --git a/paddle/fluid/framework/ir/graph_pattern_detector.cc b/paddle/fluid/framework/ir/graph_pattern_detector.cc index bf12d12459c59304167d9c52059a068b50de3980..13d752e5167c039ec8d9e4300b190a726bb02a63 100644 --- a/paddle/fluid/framework/ir/graph_pattern_detector.cc +++ b/paddle/fluid/framework/ir/graph_pattern_detector.cc @@ -17,7 +17,6 @@ #include #include -#include "graph_pattern_detector.h" #include "paddle/fluid/framework/ir/graph_helper.h" #include "paddle/fluid/framework/ir/graph_pattern_detector.h" #include "paddle/fluid/framework/ir/graph_traits.h" @@ -1210,6 +1209,33 @@ PDNode *patterns::ConvElementwiseadd2Act::operator()(PDNode *conv_in) { return act_out; } +PDNode *patterns::ConvElementwiseadd::operator()(PDNode *conv_in) { + conv_in->AsInput(); + auto conv_op = pattern->NewNode(conv_op_repr())->assert_is_op("conv2d"); + auto conv_out = pattern->NewNode(conv_out_repr()) + ->assert_is_op_output("conv2d") + ->assert_is_op_input("elementwise_add", "X") + ->AsIntermediate(); + auto conv_filter = pattern->NewNode(conv_filter_repr()) + ->assert_is_op_input("conv2d", "Filter") + ->AsInput(); + auto elementwise_add_op = pattern->NewNode(elementwise_add_op_repr()) + ->assert_is_op("elementwise_add"); + auto elementwise_add_in_y = pattern->NewNode(elementwise_add_in_y_repr()) + ->assert_is_op_input("elementwise_add", "Y") + ->AsInput(); + auto elementwise_add_out = pattern->NewNode(elementwise_add_out_repr()) + ->assert_is_op_output("elementwise_add") + ->AsOutput(); + + conv_op->LinksFrom({conv_in, conv_filter}); + conv_out->LinksFrom({conv_op}); + elementwise_add_op->LinksFrom({conv_out, elementwise_add_in_y}) + .LinksTo({elementwise_add_out}); + + return elementwise_add_out; +} + } // namespace ir } // namespace framework } // namespace paddle diff --git a/paddle/fluid/framework/ir/graph_pattern_detector.h b/paddle/fluid/framework/ir/graph_pattern_detector.h index 0fee2f1c1852b296b3599d4b7219a032062a1d49..eaedd9d08e0fab820481d6eaacb6e7bfc1ab6d1d 100644 --- a/paddle/fluid/framework/ir/graph_pattern_detector.h +++ b/paddle/fluid/framework/ir/graph_pattern_detector.h @@ -716,6 +716,24 @@ struct ConvElementwiseadd2Act : public PatternBase { PATTERN_DECL_NODE(act_out); }; +// Conv + ElementwiseAdd +// This pattern should be used after ConvElementwiseadd2Act or +// ConvElementwiseadd pass +struct ConvElementwiseadd : public PatternBase { + ConvElementwiseadd(PDPattern* pattern, const std::string& name_scope) + : PatternBase(pattern, name_scope, "conv_elementwiseadd") {} + + PDNode* operator()(PDNode* conv_in); + + PATTERN_DECL_NODE(conv_op); + PATTERN_DECL_NODE(conv_out); + PATTERN_DECL_NODE(conv_filter); + + PATTERN_DECL_NODE(elementwise_add_op); + PATTERN_DECL_NODE(elementwise_add_in_y); + PATTERN_DECL_NODE(elementwise_add_out); +}; + } // namespace patterns // Link two ir::Nodes from each other. diff --git a/paddle/fluid/inference/analysis/ir_passes/tensorrt_subgraph_pass.cc b/paddle/fluid/inference/analysis/ir_passes/tensorrt_subgraph_pass.cc index 4ffe5f575c232ccfc0089cb86e28737e56b32f94..9c42b83e7add348433635b1899087324e4e370d4 100644 --- a/paddle/fluid/inference/analysis/ir_passes/tensorrt_subgraph_pass.cc +++ b/paddle/fluid/inference/analysis/ir_passes/tensorrt_subgraph_pass.cc @@ -63,7 +63,6 @@ std::unique_ptr analysis::TensorRtSubgraphPass::ApplyImpl( void TensorRtSubgraphPass::CreateTensorRTOp(framework::ir::Node *node, Graph *graph) const { auto *op_desc = node->Op(); - static int counter{0}; auto &subgraph = *Agent(node).subgraph(); PADDLE_ENFORCE(!subgraph.empty()); @@ -192,8 +191,6 @@ void TensorRtSubgraphPass::CreateTensorRTOp(framework::ir::Node *node, block_desc.Proto()->SerializeAsString()); SetAttr(op_desc->Proto(), "max_batch_size", Get("max_batch_size")); SetAttr(op_desc->Proto(), "workspace_size", Get("workspace_size")); - SetAttr(op_desc->Proto(), "engine_uniq_key", - "trt-" + std::to_string(counter++)); SetAttr(op_desc->Proto(), "parameters", ExtractParameters(graph->Nodes())); SetAttr(op_desc->Proto(), "output_name_mapping", output_mapping); } diff --git a/paddle/fluid/inference/api/paddle_pass_builder.h b/paddle/fluid/inference/api/paddle_pass_builder.h index e6e7de24783b160769e0c9f43d8f0700a035c314..40ca0d287ccde113a20abb1036af289a36f54e6c 100644 --- a/paddle/fluid/inference/api/paddle_pass_builder.h +++ b/paddle/fluid/inference/api/paddle_pass_builder.h @@ -122,6 +122,7 @@ class GpuPassStrategy : public PassStrategy { "conv_bn_fuse_pass", // "conv_elementwise_add_act_fuse_pass", // "conv_elementwise_add2_act_fuse_pass", // + "conv_elementwise_add_fuse_pass", // }); } diff --git a/paddle/fluid/inference/tensorrt/convert/op_converter.h b/paddle/fluid/inference/tensorrt/convert/op_converter.h index d61d635ed707bc455d495f2420925a3585234b5c..91670ba8ac5332fe6e83b7bff14cb1a349d7e2a2 100644 --- a/paddle/fluid/inference/tensorrt/convert/op_converter.h +++ b/paddle/fluid/inference/tensorrt/convert/op_converter.h @@ -103,6 +103,7 @@ class OpConverter { void ConvertBlock(const framework::proto::BlockDesc& block, const std::unordered_set& parameters, const framework::Scope& scope, TensorRTEngine* engine) { + std::unique_lock lk(mut_); for (int i = 0; i < block.ops_size(); i++) { const auto& op = block.ops(i); ConvertOp(op, parameters, scope, engine); @@ -125,6 +126,7 @@ class OpConverter { std::unordered_map converters_; // fluid inference scope framework::Scope* scope_{nullptr}; + std::mutex mut_; }; } // namespace tensorrt diff --git a/paddle/fluid/operators/tensorrt/CMakeLists.txt b/paddle/fluid/operators/tensorrt/CMakeLists.txt index eee0b90fbae216e804e62993313796e914fcef5a..6b551d13f1dc5cd1c82a15a8347b278e8f795c1c 100644 --- a/paddle/fluid/operators/tensorrt/CMakeLists.txt +++ b/paddle/fluid/operators/tensorrt/CMakeLists.txt @@ -1,5 +1,5 @@ op_library(tensorrt_engine_op DEPS tensorrt_engine tensorrt_converter) -file(APPEND ${pybind_file} "USE_CUDA_ONLY_OP(tensorrt_engine);\n") +file(APPEND ${pybind_file} "USE_NO_KERNEL_OP(tensorrt_engine);\n") nv_test(test_tensorrt_engine_op SRCS tensorrt_engine_op_test.cc DEPS tensorrt_engine_op analysis) diff --git a/paddle/fluid/operators/tensorrt/tensorrt_engine_op.cc b/paddle/fluid/operators/tensorrt/tensorrt_engine_op.cc index 3cf2ce3c7ef87dcf75548f7d9c3a55d06ed765e8..b993c55fad13e892efd51648b78704bec83bf2b4 100644 --- a/paddle/fluid/operators/tensorrt/tensorrt_engine_op.cc +++ b/paddle/fluid/operators/tensorrt/tensorrt_engine_op.cc @@ -21,8 +21,6 @@ namespace paddle { -DEFINE_int32(tensorrt_engine_batch_size, 1, "the batch_size of TensorRT"); - namespace operators { class TensorRTEngineOpMaker : public framework::OpProtoAndCheckerMaker { @@ -31,7 +29,6 @@ class TensorRTEngineOpMaker : public framework::OpProtoAndCheckerMaker { AddInput("Xs", "A list of inputs.").AsDuplicable(); AddOutput("Ys", "A list of outputs").AsDuplicable(); AddAttr("subgraph", "the subgraph."); - AddAttr("engine_uniq_key", "unique key for the TRT engine."); AddAttr("max_batch_size", "the maximum batch size."); AddAttr("workspace_size", "the workspace size."); AddComment("TensorRT engine operator."); @@ -50,6 +47,6 @@ class TensorRTEngineInferVarType : public framework::VarTypeInference { namespace ops = paddle::operators; REGISTER_OPERATOR(tensorrt_engine, ops::TensorRTEngineOp, - ops::TensorRTEngineOpMaker, ops::TensorRTEngineOpMaker); + ops::TensorRTEngineOpMaker); #endif // PADDLE_WITH_CUDA diff --git a/paddle/fluid/operators/tensorrt/tensorrt_engine_op.cu.cc b/paddle/fluid/operators/tensorrt/tensorrt_engine_op.cu.cc deleted file mode 100644 index cbe1b426f65386e722a7b02ec1fdfdf75bfd770c..0000000000000000000000000000000000000000 --- a/paddle/fluid/operators/tensorrt/tensorrt_engine_op.cu.cc +++ /dev/null @@ -1,24 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "paddle/fluid/operators/tensorrt/tensorrt_engine_op.h" - -namespace ops = paddle::operators; - -REGISTER_OP_CUDA_KERNEL( - tensorrt_engine, - ops::TensorRTEngineKernel, - ops::TensorRTEngineKernel, - ops::TensorRTEngineKernel, - ops::TensorRTEngineKernel); diff --git a/paddle/fluid/operators/tensorrt/tensorrt_engine_op.h b/paddle/fluid/operators/tensorrt/tensorrt_engine_op.h index 5b2aad55a4e4b640c614ad7639c83b49cef3dc07..88c4f508474e66953b79fb92ff1eb0b53a539f07 100644 --- a/paddle/fluid/operators/tensorrt/tensorrt_engine_op.h +++ b/paddle/fluid/operators/tensorrt/tensorrt_engine_op.h @@ -27,8 +27,6 @@ namespace paddle { -DECLARE_int32(tensorrt_engine_batch_size); - namespace operators { using FluidDT = framework::proto::VarType_Type; @@ -49,7 +47,7 @@ TRT_DT FluidDataType2TRT(FluidDT type) { return TRT_DT::kINT32; } -nvinfer1::Dims Vec2TRT_Dims(const std::vector& shape) { +nvinfer1::Dims Vec2TRT_Dims(const std::vector &shape) { PADDLE_ENFORCE_GT(shape.size(), 1UL, "TensorRT' tensor input requires at least 2 dimensions"); PADDLE_ENFORCE_LE(shape.size(), 4UL, @@ -63,128 +61,119 @@ nvinfer1::Dims Vec2TRT_Dims(const std::vector& shape) { } // namespace // NOLINT using inference::Singleton; -using inference::tensorrt::TRT_EngineManager; +using inference::tensorrt::TensorRTEngine; + +class TensorRTEngineOp : public framework::OperatorBase { + private: + std::vector input_names_; + std::unordered_set param_names_; + mutable std::unique_ptr trt_engine_; + int max_batch_size_; + int workspace_size_; -class TensorRTEngineOp : public framework::OperatorWithKernel { public: - using framework::OperatorWithKernel::OperatorWithKernel; + TensorRTEngineOp(const std::string &type, + const framework::VariableNameMap &inputs, + const framework::VariableNameMap &outputs, + const framework::AttributeMap &attrs) + : framework::OperatorBase(type, inputs, outputs, attrs) { + input_names_ = Inputs("Xs"); + max_batch_size_ = Attr("max_batch_size"); + workspace_size_ = Attr("workspace_size"); + + auto params = Attr>("parameters"); + for (const auto ¶m : params) { + param_names_.insert(param); + } + } protected: - void InferShape(framework::InferShapeContext* ctx) const override {} - - framework::OpKernelType GetExpectedKernelType( - const framework::ExecutionContext& ctx) const override { - auto input0 = ctx.Inputs("Xs").front(); - framework::OpKernelType kt = framework::OpKernelType( - ctx.scope().FindVar(input0)->GetMutable()->type(), - ctx.GetPlace()); - return kt; + void RunImpl(const framework::Scope &scope, + const platform::Place &dev_place) const override { + RunTrt(scope, dev_place); } -}; -template -class TensorRTEngineKernel : public framework::OpKernel { - public: - void Compute(const framework::ExecutionContext& context) const override { - auto engine_name = context.Attr("engine_uniq_key"); - int max_batch_size = context.Attr("max_batch_size"); - if (!Singleton::Global().HasEngine(engine_name)) { - Prepare(context); + void RunTrt(const framework::Scope &scope, + const platform::Place &dev_place) const { + int runtime_batch = 1; + if (trt_engine_.get() == nullptr) { + trt_engine_.reset(new TensorRTEngine( + max_batch_size_, workspace_size_, nullptr, + boost::get(dev_place).device)); + Prepare(scope, dev_place, trt_engine_.get()); } - auto* engine = Singleton::Global().Get(engine_name); - auto input_names = context.op().Inputs("Xs"); - PADDLE_ENFORCE(!input_names.empty(), "should pass more than one inputs"); - PADDLE_ENFORCE_LE(FLAGS_tensorrt_engine_batch_size, max_batch_size); + + auto *engine = trt_engine_.get(); + PADDLE_ENFORCE(!input_names_.empty(), "should pass more than one inputs"); std::vector output_maps = - context.Attr>("output_name_mapping"); + Attr>("output_name_mapping"); - auto params = context.Attr>("parameters"); - std::unordered_set parameters; - for (const auto& param : params) { - parameters.insert(param); - } // Convert input tensor from fluid to engine. - for (const auto& x : context.Inputs("Xs")) { - if (parameters.count(x)) continue; + for (const auto &x : Inputs("Xs")) { + if (param_names_.count(x)) continue; // convert input and copy to TRT engine's buffer - auto& t = inference::analysis::GetFromScope( - context.scope(), x); + auto &t = + inference::analysis::GetFromScope(scope, x); + auto t_shape = framework::vectorize(t.dims()); + runtime_batch = t_shape[0]; if (platform::is_cpu_place(t.place())) { - engine->SetInputFromCPU(x, static_cast(t.data()), + engine->SetInputFromCPU(x, static_cast(t.data()), t.memory_size()); } else { - engine->SetInputFromGPU(x, static_cast(t.data()), + engine->SetInputFromGPU(x, static_cast(t.data()), t.memory_size()); } } + + PADDLE_ENFORCE_LE(runtime_batch, max_batch_size_); // Execute the engine. - PADDLE_ENFORCE_GT(FLAGS_tensorrt_engine_batch_size, 0); - engine->Execute(FLAGS_tensorrt_engine_batch_size); + engine->Execute(runtime_batch); // Convert output tensor from engine to fluid int output_index = 0; VLOG(4) << "TensorRT Engine Op Outputs:"; - for (const auto& y : context.Outputs("Ys")) { + for (const auto &y : Outputs("Ys")) { VLOG(4) << y; // convert output and copy to fluid. - nvinfer1::ITensor* trt_t = engine->GetITensor(output_maps[output_index]); + nvinfer1::ITensor *trt_t = engine->GetITensor(output_maps[output_index]); auto dims = trt_t->getDimensions(); // Use the output ITensor's dims to reshape the Fluid Tensor. // The ITensor doesn't contain the batch size dim. std::vector ddim; - ddim.push_back(FLAGS_tensorrt_engine_batch_size); + ddim.push_back(runtime_batch); for (int i = 0; i < dims.nbDims; i++) { ddim.push_back(dims.d[i]); } - auto* fluid_v = context.scope().FindVar(y); + auto *fluid_v = scope.FindVar(y); PADDLE_ENFORCE_NOT_NULL(fluid_v, "no output variable called %s", y); - auto* fluid_t = fluid_v->GetMutable(); + auto *fluid_t = fluid_v->GetMutable(); fluid_t->Resize(framework::make_ddim(ddim)); - // TODO(Superjomn) find some way to determine which device to output the - // tensor. - // if (platform::is_cpu_place(fluid_t->place())) { // TODO(Superjomn) change this float to dtype size. - auto size = inference::analysis::AccuDims(dims.d, dims.nbDims) * - FLAGS_tensorrt_engine_batch_size; + auto size = + inference::analysis::AccuDims(dims.d, dims.nbDims) * runtime_batch; engine->GetOutputInGPU( output_maps[output_index], fluid_t->mutable_data(platform::CUDAPlace( - boost::get(context.GetPlace()).device)), + boost::get(dev_place).device)), size * sizeof(float)); - output_index += 1; } cudaStreamSynchronize(*engine->stream()); } - protected: - void Prepare(const framework::ExecutionContext& context) const { + void Prepare(const framework::Scope &scope, const platform::Place &dev_place, + TensorRTEngine *engine) const { VLOG(4) << "Prepare engine"; - // Get the ProgramDesc and pass to convert. framework::proto::BlockDesc block_desc; - block_desc.ParseFromString(context.Attr("subgraph")); - int max_batch_size = context.Attr("max_batch_size"); - int workspace_size = context.Attr("workspace_size"); - - auto params = context.Attr>("parameters"); - std::unordered_set parameters; - for (const auto& param : params) { - parameters.insert(param); - } + block_desc.ParseFromString(Attr("subgraph")); std::vector output_maps = - context.Attr>("output_name_mapping"); - - // TODO(Superjomn) replace this with a different stream - auto* engine = Singleton::Global().Create( - max_batch_size, workspace_size, nullptr /*engine hold its own stream*/, - context.Attr("engine_uniq_key"), - boost::get(context.GetPlace()).device); + Attr>("output_name_mapping"); engine->InitNetwork(); @@ -192,39 +181,33 @@ class TensorRTEngineKernel : public framework::OpKernel { VLOG(4) << "parsed var size " << block.AllVars().size(); // Add inputs VLOG(4) << "declare inputs"; - for (auto& input : context.Inputs("Xs")) { - if (parameters.count(input)) continue; + for (auto &input : Inputs("Xs")) { + if (param_names_.count(input)) continue; VLOG(4) << "declare input " << input; - auto* var = block.FindVar(input); + + auto &t = + inference::analysis::GetFromScope(scope, input); + auto t_shape = framework::vectorize(t.dims()); + + auto *var = block.FindVar(input); // TensorRT engine need to create parameters. The parameter's description // should be set in PADDLE_ENFORCE(var, "no variable called %s", input); PADDLE_ENFORCE_EQ(var->GetType(), FluidDT::VarType_Type_LOD_TENSOR, "TensorRT engine only takes LoDTensor as input"); - auto shape = var->GetShape(); - // For the special batch_size placeholder -1, drop it and pass the real - // shape of data. - // TODO(Superjomn) fix this with batch broadcast, or it can't handle - // variational batch size. - if (shape[0] == -1) { - shape[0] = FLAGS_tensorrt_engine_batch_size; - } + engine->DeclareInput( input, FluidDataType2TRT( var->Proto()->type().lod_tensor().tensor().data_type()), - Vec2TRT_Dims(shape)); + Vec2TRT_Dims(t_shape)); } - inference::Singleton::Global() - .ConvertBlock(block_desc, parameters, context.scope(), engine); + .ConvertBlock(block_desc, param_names_, scope, engine); // Add outputs - for (auto& output : output_maps) { - if (!engine->HasDeclared(output)) { - engine->DeclareOutput(output); - } + for (auto &output : output_maps) { + engine->DeclareOutput(output); } - engine->FreezeNetwork(); } }; diff --git a/paddle/fluid/operators/tensorrt/tensorrt_engine_op_test.cc b/paddle/fluid/operators/tensorrt/tensorrt_engine_op_test.cc index 56bdd6c2f2801967829f2baf889b5517a1d9d8d9..287b0edc96e5e312b0ff1725ee188ff319d44d23 100644 --- a/paddle/fluid/operators/tensorrt/tensorrt_engine_op_test.cc +++ b/paddle/fluid/operators/tensorrt/tensorrt_engine_op_test.cc @@ -24,8 +24,7 @@ limitations under the License. */ #include "paddle/fluid/inference/tensorrt/convert/op_converter.h" #include "paddle/fluid/inference/tensorrt/convert/ut_helper.h" -USE_CUDA_ONLY_OP(tensorrt_engine); - +USE_NO_KERNEL_OP(tensorrt_engine); namespace paddle { namespace operators {