diff --git a/paddle/fluid/framework/ir/attention_lstm_fuse_pass.cc b/paddle/fluid/framework/ir/attention_lstm_fuse_pass.cc index 8225840a69d7cf6ee053632791aa4227b8de4ec9..1aa12ded2611b2ca61e3f59c4f59880ef4ffaad7 100644 --- a/paddle/fluid/framework/ir/attention_lstm_fuse_pass.cc +++ b/paddle/fluid/framework/ir/attention_lstm_fuse_pass.cc @@ -107,11 +107,7 @@ void AttentionLSTMFusePass::FindWhileOp(Graph* graph) const { gpd.mutable_pattern()->NewNode( [&](Node* n) { return fused_external_ops.count(n->id()); }, "while"); - if (!graph->Has(kGraphvizMarkedNodeAttr)) { - graph->Set(kGraphvizMarkedNodeAttr, new GraphVizPass::marked_nodes_t); - } - auto& marked_nodes = - graph->Get(kGraphvizMarkedNodeAttr); + auto& marked_nodes = GetMarkedNodes(graph); auto handle = [&](const GraphPatternDetector::subgraph_t& subgraph, Graph* g) { diff --git a/paddle/fluid/framework/ir/graph_viz_pass.cc b/paddle/fluid/framework/ir/graph_viz_pass.cc index eed4ab38f5be749dcd0739e0d0c2dd043a93a949..8403d9075e3bf6a9856976ad9e37f345cc777fb5 100644 --- a/paddle/fluid/framework/ir/graph_viz_pass.cc +++ b/paddle/fluid/framework/ir/graph_viz_pass.cc @@ -14,6 +14,8 @@ limitations under the License. */ #include "paddle/fluid/framework/ir/graph_viz_pass.h" +#include +#include #include #include "paddle/fluid/framework/ir/graph_helper.h" diff --git a/paddle/fluid/framework/ir/graph_viz_pass.h b/paddle/fluid/framework/ir/graph_viz_pass.h index 118c1bc6f3c78e6ec05c897939343df83b1c3851..0d3b32a7c1a78407a7f4f5a7e830276c8247c5df 100644 --- a/paddle/fluid/framework/ir/graph_viz_pass.h +++ b/paddle/fluid/framework/ir/graph_viz_pass.h @@ -14,12 +14,7 @@ limitations under the License. */ #pragma once -#include -#include -#include -#include #include -#include #include "paddle/fluid/framework/ir/graph.h" #include "paddle/fluid/framework/ir/pass.h" diff --git a/paddle/fluid/inference/analysis/analyzer.cc b/paddle/fluid/inference/analysis/analyzer.cc index 4aadb34d7b354e99eb5a5717f91e55fd91d823dc..afa653dec906981254535e6e56a7770aa1fb31c0 100644 --- a/paddle/fluid/inference/analysis/analyzer.cc +++ b/paddle/fluid/inference/analysis/analyzer.cc @@ -15,7 +15,6 @@ #include "paddle/fluid/inference/analysis/analyzer.h" #include -#include #include "paddle/fluid/inference/analysis/passes/passes.h" #include "paddle/fluid/string/pretty_log.h" diff --git a/paddle/fluid/inference/analysis/analyzer.h b/paddle/fluid/inference/analysis/analyzer.h index 95a985158e6783dfa12615779e36b3283e979631..3c053283666748a33a2dfa8167fe126d8ab61219 100644 --- a/paddle/fluid/inference/analysis/analyzer.h +++ b/paddle/fluid/inference/analysis/analyzer.h @@ -35,12 +35,10 @@ limitations under the License. */ * phase in the inference service. */ -#include #include #include "gflags/gflags.h" #include "paddle/fluid/inference/analysis/analysis_pass.h" -#include "paddle/fluid/inference/analysis/flags.h" namespace paddle { namespace inference { diff --git a/paddle/fluid/inference/analysis/device.h b/paddle/fluid/inference/analysis/device.h deleted file mode 100644 index 585c9923291e5f9cb6e50dbc4bcd28c374191048..0000000000000000000000000000000000000000 --- a/paddle/fluid/inference/analysis/device.h +++ /dev/null @@ -1,24 +0,0 @@ -/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - -http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ -#pragma once - -namespace paddle { -namespace inference { -namespace analysis { - -enum class Device { CPU, GPU }; - -} // namespace analysis -} // namespace inference -} // namespace paddle diff --git a/paddle/fluid/inference/analysis/dot_tester.cc b/paddle/fluid/inference/analysis/dot_tester.cc deleted file mode 100644 index 0b669093a1f32c52d559c49c2396314afeb360aa..0000000000000000000000000000000000000000 --- a/paddle/fluid/inference/analysis/dot_tester.cc +++ /dev/null @@ -1,62 +0,0 @@ -// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include - -#include - -#include "paddle/fluid/inference/analysis/dot.h" - -namespace paddle { -namespace inference { -namespace analysis { - -class DotTester : public ::testing::Test { - protected: - void SetUp() override { - std::vector attrs({{"title", "hello"}}); - dot.reset(new Dot(attrs)); - dot->AddNode("a", {Dot::Attr{"shape", "box"}, Dot::Attr("color", "blue")}); - dot->AddNode("b", {}); - dot->AddNode("c", {}); - dot->AddEdge("a", "b", {}); - dot->AddEdge("b", "c", {}); - dot->AddEdge("a", "c", {}); - } - - std::unique_ptr dot; -}; - -TEST_F(DotTester, Build) { - auto codes = dot->Build(); - // Output the DOT language code, the generated codes are too long to compare - // the string. - // - // The output is - // - // digraph G { - // title="hello" - // node_1 - // node_2 - // node_0[label="a" shape="box" color="blue"] - // node_0->node_1 - // node_1->node_2 - // node_0->node_2 - // } // end G - LOG(INFO) << '\n' << codes; -} - -} // namespace analysis -} // namespace inference -} // namespace paddle diff --git a/paddle/fluid/inference/analysis/flags.h b/paddle/fluid/inference/analysis/flags.h deleted file mode 100644 index ea0406128b53bfdf04ccb97d2eba2c1c236c845c..0000000000000000000000000000000000000000 --- a/paddle/fluid/inference/analysis/flags.h +++ /dev/null @@ -1,24 +0,0 @@ -// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#pragma once - -#include "gflags/gflags.h" - -// TODO(Superjomn) add a definition flag like PADDLE_WITH_TENSORRT and hide this -// flag if not available. -DECLARE_bool(IA_enable_tensorrt_subgraph_engine); -DECLARE_string(IA_graphviz_log_root); -DECLARE_string(IA_output_storage_path); -DECLARE_bool(IA_enable_ir); diff --git a/paddle/fluid/inference/analysis/ir_passes/tensorrt_subgraph_pass.cc b/paddle/fluid/inference/analysis/ir_passes/tensorrt_subgraph_pass.cc index e9733a07ec8b56af5b4574915bd085eaf7f4c486..9d39b6e6118271de8f39be99e53e06a7f855b2bc 100644 --- a/paddle/fluid/inference/analysis/ir_passes/tensorrt_subgraph_pass.cc +++ b/paddle/fluid/inference/analysis/ir_passes/tensorrt_subgraph_pass.cc @@ -28,6 +28,7 @@ #include "paddle/fluid/framework/ir/subgraph_detector.h" #include "paddle/fluid/framework/op_version_registry.h" #include "paddle/fluid/inference/analysis/helper.h" +#include "paddle/fluid/inference/analysis/ir_passes/subgraph_util.h" #include "paddle/fluid/inference/analysis/passes/convert_to_mixed_precision.h" #include "paddle/fluid/inference/api/helper.h" #include "paddle/fluid/inference/tensorrt/convert/op_converter.h" diff --git a/paddle/fluid/inference/analysis/ir_passes/tensorrt_subgraph_pass.h b/paddle/fluid/inference/analysis/ir_passes/tensorrt_subgraph_pass.h index a79c41f6a075f19e50d7410170136bc0903a6cde..b720f2c6b9f10c34a525431bf68760bb83dfcd19 100644 --- a/paddle/fluid/inference/analysis/ir_passes/tensorrt_subgraph_pass.h +++ b/paddle/fluid/inference/analysis/ir_passes/tensorrt_subgraph_pass.h @@ -21,7 +21,6 @@ #include "paddle/fluid/framework/ir/fuse_pass_base.h" #include "paddle/fluid/framework/ir/pass.h" -#include "paddle/fluid/inference/analysis/ir_passes/subgraph_util.h" #include "paddle/fluid/inference/api/paddle_analysis_config.h" namespace paddle { diff --git a/paddle/fluid/inference/analysis/passes/ir_analysis_pass.cc b/paddle/fluid/inference/analysis/passes/ir_analysis_pass.cc index 2f9914ac54a79b105b98d7ef96e3b71254ba2dee..f93606892b6464d0269ad5f9bbe2630b20903089 100644 --- a/paddle/fluid/inference/analysis/passes/ir_analysis_pass.cc +++ b/paddle/fluid/inference/analysis/passes/ir_analysis_pass.cc @@ -14,16 +14,9 @@ #include "paddle/fluid/inference/analysis/passes/ir_analysis_pass.h" -#include -#include - #include "paddle/fluid/framework/ir/fuse_pass_base.h" #include "paddle/fluid/inference/analysis/ir_pass_manager.h" -#ifdef PADDLE_WITH_MKLDNN -#include "paddle/fluid/framework/ir/mkldnn/mkldnn_pass_util.h" -#endif - namespace paddle { namespace inference { namespace analysis { diff --git a/paddle/fluid/inference/analysis/passes/ir_analysis_pass.h b/paddle/fluid/inference/analysis/passes/ir_analysis_pass.h index 98ee592245660533986a9893bbca603186d0fd78..fca431b5d7779838b940209099eaab0d6e3e707e 100644 --- a/paddle/fluid/inference/analysis/passes/ir_analysis_pass.h +++ b/paddle/fluid/inference/analysis/passes/ir_analysis_pass.h @@ -33,10 +33,6 @@ class IrAnalysisPass : public AnalysisPass { void CollectFusionStatis(Argument* argument); - void ReadCalibrationInfo( - Argument* argument, - std::unordered_map>* var_quant_scales); - std::string repr() const override; }; diff --git a/paddle/fluid/inference/analysis/passes/ir_graph_build_pass.cc b/paddle/fluid/inference/analysis/passes/ir_graph_build_pass.cc index 3f13fd639aa554a43ef3ad22a87c42c5bfa1ab9c..60d94eae7a6e03395d43bb648076bc5e3c2f6a37 100644 --- a/paddle/fluid/inference/analysis/passes/ir_graph_build_pass.cc +++ b/paddle/fluid/inference/analysis/passes/ir_graph_build_pass.cc @@ -14,7 +14,6 @@ #include "paddle/fluid/inference/analysis/passes/ir_graph_build_pass.h" -#include #include #include "paddle/fluid/framework/executor.h" diff --git a/paddle/fluid/inference/analysis/passes/ir_params_sync_among_devices_pass.cc b/paddle/fluid/inference/analysis/passes/ir_params_sync_among_devices_pass.cc index 54b7cd85ff2bb4eb04928103a35ae9dad68e1668..65e149925e742924ff49c03a07afabb79dd50736 100644 --- a/paddle/fluid/inference/analysis/passes/ir_params_sync_among_devices_pass.cc +++ b/paddle/fluid/inference/analysis/passes/ir_params_sync_among_devices_pass.cc @@ -17,15 +17,15 @@ #include #include #include +#include -#include "paddle/fluid/framework/data_layout.h" #include "paddle/fluid/framework/framework.pb.h" #include "paddle/fluid/framework/ir/graph_helper.h" -#include "paddle/fluid/framework/lod_tensor.h" +#include "paddle/fluid/framework/scope.h" #include "paddle/fluid/framework/tensor_util.h" -#include "paddle/fluid/platform/bfloat16.h" #include "paddle/fluid/platform/enforce.h" -#include "paddle/phi/common/data_type.h" +#include "paddle/fluid/platform/place.h" +#include "paddle/phi/core/dense_tensor.h" DEFINE_bool( custom_model_save_cpu, diff --git a/paddle/fluid/inference/analysis/passes/ir_params_sync_among_devices_pass.h b/paddle/fluid/inference/analysis/passes/ir_params_sync_among_devices_pass.h index 9db17abc24d2a26caa2f0bea2a626cb7e1b5049b..ee29af1c13308b8af63fa94313ffa5747a30efd6 100644 --- a/paddle/fluid/inference/analysis/passes/ir_params_sync_among_devices_pass.h +++ b/paddle/fluid/inference/analysis/passes/ir_params_sync_among_devices_pass.h @@ -15,12 +15,9 @@ #pragma once #include -#include #include "paddle/fluid/framework/ir/fuse_pass_base.h" -#include "paddle/fluid/framework/scope.h" #include "paddle/fluid/inference/analysis/analysis_pass.h" -#include "paddle/fluid/platform/place.h" namespace paddle { namespace inference { diff --git a/paddle/fluid/inference/analysis/passes/memory_optimize_pass.cc b/paddle/fluid/inference/analysis/passes/memory_optimize_pass.cc old mode 100755 new mode 100644 index 40a8c5ce66a2a5b7c5f54784abdcbdc2c9e3e531..a9478cde66ded90b5ecfaae8eb7aaa47ea530acc --- a/paddle/fluid/inference/analysis/passes/memory_optimize_pass.cc +++ b/paddle/fluid/inference/analysis/passes/memory_optimize_pass.cc @@ -15,7 +15,9 @@ #include "paddle/fluid/inference/analysis/passes/memory_optimize_pass.h" #include +#include #include +#include #include "glog/logging.h" #include "paddle/fluid/framework/ir/graph_helper.h" diff --git a/paddle/fluid/inference/analysis/passes/memory_optimize_pass.h b/paddle/fluid/inference/analysis/passes/memory_optimize_pass.h index 5dcd8b1059ebc2690a38ab7e2d201161345d762d..9ce804019744c7a884a11836703ac56b1c0ba7d8 100644 --- a/paddle/fluid/inference/analysis/passes/memory_optimize_pass.h +++ b/paddle/fluid/inference/analysis/passes/memory_optimize_pass.h @@ -15,12 +15,9 @@ #pragma once #include #include -#include #include -#include #include "paddle/fluid/inference/analysis/analysis_pass.h" -#include "paddle/phi/backends/dynload/port.h" namespace paddle { namespace framework { diff --git a/paddle/fluid/inference/api/CMakeLists.txt b/paddle/fluid/inference/api/CMakeLists.txt index f07e5406a002b2f241f128141a763458efb242e4..b681e56d3b9f9e96aebf4d457bf2d3edcc87a453 100755 --- a/paddle/fluid/inference/api/CMakeLists.txt +++ b/paddle/fluid/inference/api/CMakeLists.txt @@ -1,18 +1,3 @@ -# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - if(APPLE) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-error=pessimizing-move") endif() diff --git a/paddle/fluid/inference/api/README.md b/paddle/fluid/inference/api/README.md deleted file mode 100644 index 990b061c8f92b01a593241217b24b5b3204c9121..0000000000000000000000000000000000000000 --- a/paddle/fluid/inference/api/README.md +++ /dev/null @@ -1,17 +0,0 @@ -# Embed Paddle Inference in Your Application - -Paddle inference offers the APIs in `C` and `C++` languages. - -You can easily deploy a model trained by Paddle following the steps as below: - -1. Optimize the native model; -2. Write some codes for deployment. - -## The APIs - -All the released APIs are located in the `paddle_inference_api.h` header file. -The stable APIs are wrapped by `namespace paddle`, the unstable APIs are protected by `namespace paddle::contrib`. - -## Write some codes - -Read `paddle_inference_api.h` for more information. diff --git a/paddle/fluid/inference/api/high_level_api.md b/paddle/fluid/inference/api/high_level_api.md deleted file mode 100644 index ca22767a1b8895f365633541c86d24182a3268b1..0000000000000000000000000000000000000000 --- a/paddle/fluid/inference/api/high_level_api.md +++ /dev/null @@ -1,59 +0,0 @@ -# Inference High-level APIs -This document describes the high-level inference APIs, one can use them to deploy a Paddle model for an application quickly. - -The APIs are described in `paddle_inference_api.h`, just one header file, and two libaries `libpaddle_inference.so` and `libpaddle_inference_io.so` are needed for a deployment. - -## PaddleTensor -We provide the `PaddleTensor` data structure to give a general tensor interface. - -The definition is - -```c++ -struct PaddleTensor { - std::string name; // variable name. - std::vector shape; - PaddleBuf data; // blob of data. - PaddleDType dtype; -}; -``` - -The data is stored in a continuous memory `PaddleBuf,` and a `PaddleDType` specifies tensor's data type. -The `name` field is used to specify the name of an input variable, -that is important when there are multiple inputs and need to distinguish which variable to set. - -## engine -The inference APIs has two different underlying engines - -- the native engine -- the tensorrt engine - -The native engine, which is consists of the native operators and framework, takes a native Paddle model -as input, and supports any model that trained by Paddle. - -```c++ -enum class PaddleEngineKind { - kNative = 0, // Use the native Fluid facility. - kAutoMixedTensorRT // Automatically mixing TensorRT with the Fluid ops. -}; -``` - -## PaddlePredictor and how to create one -The main interface is `PaddlePredictor,` there are following methods - -- `bool Run(const std::vector& inputs, std::vector* output_data)` - - take inputs and output `output_data.` -- `Clone` to clone a predictor from an existing one, with model parameter shared. - -There is a factory method to help create a predictor, and the user takes the ownership of this object. - -```c++ -template -std::unique_ptr CreatePaddlePredictor(const ConfigT& config); -``` - -By specifying the engine kind and config, one can get a specific implementation. - -## Reference - -- [paddle_inference_api.h](./paddle_inference_api.h) -- [some demos](./demo_ci) diff --git a/paddle/fluid/inference/api/high_level_api_cn.md b/paddle/fluid/inference/api/high_level_api_cn.md deleted file mode 100644 index 6fb4a55f200ebc8c34de4b3266f7021e2e97c312..0000000000000000000000000000000000000000 --- a/paddle/fluid/inference/api/high_level_api_cn.md +++ /dev/null @@ -1,84 +0,0 @@ -# Paddle 预测 API - -为了更简单方便的预测部署,Fluid 提供了一套高层 API 用来隐藏底层不同的优化实现。 - -预测库包含: - -- 头文件 `paddle_inference_api.h` 定义了所有的接口 -- 库文件 `libpaddle_inference.so/.a(Linux/Mac)` `libpaddle_inference.lib/paddle_inference.dll(Windows)` - -下面是详细的一些 API 概念介绍 - -## PaddleTensor - -PaddleTensor 定义了预测最基本的输入输出的数据格式,其定义是 - -```c++ -struct PaddleTensor { - std::string name; // variable name. - std::vector shape; - PaddleBuf data; // blob of data. - PaddleDType dtype; -}; -``` - -- `name` 用于指定输入数据对应的 模型中variable 的名字 (暂时没有用,但会在后续支持任意 target 时启用) -- `shape` 表示一个 Tensor 的 shape -- `data` 数据以连续内存的方式存储在`PaddleBuf` 中,`PaddleBuf` 可以接收外面的数据或者独立`malloc`内存,详细可以参考头文件中相关定义。 -- `dtype` 表示 Tensor 的数据类型 - -## engine - -高层 API 底层有多种优化实现,我们称之为 engine,目前有两种 engine - -- 原生 engine,由 paddle 原生的 forward operator 组成,可以天然支持所有paddle 训练出的模型, -- TensorRT mixed engine,用子图的方式支持了 [TensorRT](https://developer.nvidia.com/tensorrt) ,支持所有paddle 模型,并自动切割部分计算子图到 TensorRT 上加速(WIP) - -其实现为 - -```c++ -enum class PaddleEngineKind { - kNative = 0, // Use the native Fluid facility. - kAutoMixedTensorRT // Automatically mixing TensorRT with the Fluid ops. -}; -``` - -## 预测部署过程 - -总体上分为以下步骤 - -1. 用合适的配置创建 `PaddlePredictor` -2. 创建输入用的 `PaddleTensor`,传入到 `PaddlePredictor` 中 -3. 获取输出的 `PaddleTensor` ,将结果取出 - -下面完整演示一个简单的模型,部分细节代码隐去 - -```c++ -#include "paddle_inference_api.h" - -// 创建一个 config,并修改相关设置 -paddle::NativeConfig config; -config.model_dir = "xxx"; -config.use_gpu = false; -// 创建一个原生的 PaddlePredictor -auto predictor = - paddle::CreatePaddlePredictor(config); -// 创建输入 tensor -int64_t data[4] = {1, 2, 3, 4}; -paddle::PaddleTensor tensor{.name = "", - .shape = std::vector({4, 1}), - .data = paddle::PaddleBuf(data, sizeof(data)), - .dtype = paddle::PaddleDType::INT64}; -// 创建输出 tensor,输出 tensor 的内存可以复用 -std::vector outputs; -// 执行预测 -CHECK(predictor->Run(slots, &outputs)); -// 获取 outputs ... -``` - -编译时,联编 `libpaddle_inference.a/.so(Linux/Mac)` 或 `libpaddle_inference.lib/paddle_inference.dll(Windows)` 便可。 - -## 详细代码参考 - -- [inference demos](./demo_ci) -- [复杂单线程/多线程例子](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/fluid/inference/api/test_api_impl.cc) diff --git a/paddle/fluid/inference/engine.h b/paddle/fluid/inference/engine.h deleted file mode 100644 index e29162cf5b23bacafcf2e5ef600a96ed4518c360..0000000000000000000000000000000000000000 --- a/paddle/fluid/inference/engine.h +++ /dev/null @@ -1,62 +0,0 @@ -/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - -http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#pragma once - -#include - -namespace paddle { -namespace inference { - -struct Buffer; -enum class DeviceType { UNK = -1, CPU, GPU }; - -/* - * EngineBase is the base class of all inference engines. An inference engine - * takes a paddle program as input, and outputs the result in fluid Tensor - * format. It can be used to optimize performance of computation sub-blocks, for - * example, break down the original block into sub-blocks and execute each - * sub-blocks in different engines. - * - * For example: - * When inference, the resnet50 model can put most of the model into subgraph - * and run it on a TensorRT engine. - * - * There are several engines such as TensorRT and other frameworks, so an - * EngineBase is put forward to give an unified interface for all the - * different engine implemention. - */ -class EngineBase { - public: - using DescType = ::paddle::framework::proto::BlockDesc; - - // Build the model and do some preparation, for example, in TensorRT, run - // createInferBuilder, buildCudaEngine. - virtual void Build(const DescType& paddle_model) = 0; - - // Execute the engine, that will run the inference network. - virtual void Execute(int batch_size) = 0; - - virtual ~EngineBase() {} -}; // class EngineBase - -struct Buffer { - void* buffer{nullptr}; // buffer should be allocated only once. - size_t max_size; // buffer allocated space. - size_t size; // data size. - DeviceType device{DeviceType::UNK}; // tells which device this buffer is on. -}; - -} // namespace inference -} // namespace paddle diff --git a/paddle/fluid/inference/tensorrt/convert/CMakeLists.txt b/paddle/fluid/inference/tensorrt/convert/CMakeLists.txt index d09c344ca1643a50997c0c69130b8c600f78c60c..a52d6b1c39df35ed9317debd5d77c3f6560b89d1 100755 --- a/paddle/fluid/inference/tensorrt/convert/CMakeLists.txt +++ b/paddle/fluid/inference/tensorrt/convert/CMakeLists.txt @@ -99,7 +99,6 @@ list( merge_layernorm_op.cc skip_merge_layernorm_op.cc generic_and_custom_plugin_creater.cc - fused_lookup_tables_op.cc lookup_table_op.cc elementwiseadd_transpose_op.cc skip_groupnorm_act_op.cc diff --git a/paddle/fluid/inference/tensorrt/convert/fused_lookup_tables_op.cc b/paddle/fluid/inference/tensorrt/convert/fused_lookup_tables_op.cc deleted file mode 100644 index aaeea2d725809eac1c26ec51b9c79d1f5d251d26..0000000000000000000000000000000000000000 --- a/paddle/fluid/inference/tensorrt/convert/fused_lookup_tables_op.cc +++ /dev/null @@ -1,115 +0,0 @@ -/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - -http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "paddle/fluid/inference/tensorrt/convert/op_converter.h" -#include "paddle/fluid/inference/tensorrt/convert/utils.h" -#include "paddle/fluid/inference/tensorrt/plugin/lookup_table.h" - -namespace paddle { -namespace inference { -namespace tensorrt { - -class FusedLookupTablesOpConverter : public OpConverter { - public: - void operator()(const framework::proto::OpDesc& op, - const framework::Scope& scope, - bool test_mode) override { - if (!engine_->with_dynamic_shape()) { - PADDLE_THROW( - platform::errors::Fatal("lookup_table_op must with dynamic shape")); - } - - framework::OpDesc op_desc(op, nullptr); - auto ids_name = op_desc.Input("Ids").front(); - auto w_name = op_desc.Input("W").front(); - auto output_name = op_desc.Output("Out").front(); - bool enable_int8 = op_desc.HasAttr("enable_int8"); - std::vector plugin_inputs; - - auto ids_dims = engine_->GetITensor(ids_name)->getDimensions(); - if (ids_dims.d[ids_dims.nbDims - 1] == 1) { - nvinfer1::Dims new_ids_dims; - new_ids_dims.nbDims = ids_dims.nbDims - 1; - for (int i = 0; i < ids_dims.nbDims - 1; i++) { - new_ids_dims.d[i] = 0; - } - auto* reshape_layer = TRT_ENGINE_ADD_LAYER( - engine_, Shuffle, *(engine_->GetITensor(ids_name))); - reshape_layer->setReshapeDimensions(new_ids_dims); - reshape_layer->setName( - ("lookup_table: Shuffle (Output: " + output_name + ")").c_str()); - plugin_inputs.push_back(reshape_layer->getOutput(0)); - } else { - plugin_inputs.push_back(engine_->GetITensor(ids_name)); - } - - TensorRTEngine::Weight weight; - auto* w_var = scope.FindVar(w_name); - auto* w_tensor = w_var->GetMutable(); - auto w_dims = w_tensor->dims(); - weight = engine_->GetTrtWeight(w_name, *w_tensor); - auto weight_size = phi::product(w_dims); - int output_fp16; - if (engine_->precision() == AnalysisConfig::Precision::kFloat32) { - output_fp16 = 0; - } else { - output_fp16 = 1; - } - - int32_t weight_width = static_cast(w_dims[1]); - - std::vector fields; - fields.emplace_back("lookup_table_weight", - weight.get().values, - GetPluginFieldType(weight.get().type), - static_cast(weight_size)); - fields.emplace_back("lookup_table_weight_width", - &weight_width, - nvinfer1::PluginFieldType::kINT32, - 1); - fields.emplace_back( - "output_fp16", &output_fp16, nvinfer1::PluginFieldType::kINT32, 1); - nvinfer1::PluginFieldCollection* plugin_ptr = - static_cast( - malloc(sizeof(*plugin_ptr) + - fields.size() * sizeof(nvinfer1::PluginField))); - plugin_ptr->nbFields = static_cast(fields.size()); - plugin_ptr->fields = fields.data(); - auto creator = - GetPluginRegistry()->getPluginCreator("LookupTablePluginDynamic", "1"); - auto plugin_obj = - creator->createPlugin("LookupTablePluginDynamic", plugin_ptr); - - auto plugin_layer = engine_->network()->addPluginV2( - plugin_inputs.data(), plugin_inputs.size(), *plugin_obj); - - plugin_layer->setName( - ("lookup_table: (Output: " + output_name + ")").c_str()); - engine_->SetITensor(output_name, plugin_layer->getOutput(0)); - free(plugin_ptr); - if (enable_int8) { - float out_scale = - PADDLE_GET_CONST(float, op_desc.GetAttr("out_threshold")); - engine_->SetTensorDynamicRange(plugin_layer->getOutput(0), out_scale); - } - } -}; - -} // namespace tensorrt -} // namespace inference -} // namespace paddle - -// NOTE(liuyuanle): We will remove the implementation here later. Ref to -// tensorrt/convert/lookup_table_op.cc. -// REGISTER_TRT_OP_CONVERTER(lookup_table, FusedLookupTablesOpConverter); diff --git a/paddle/fluid/inference/tensorrt/convert/prelu_op.cc b/paddle/fluid/inference/tensorrt/convert/prelu_op.cc index 80a2ac46f44dc31c858e8386b878264a99648f37..2d2a804f394fd80915d265f461d620f1333a2d38 100644 --- a/paddle/fluid/inference/tensorrt/convert/prelu_op.cc +++ b/paddle/fluid/inference/tensorrt/convert/prelu_op.cc @@ -13,7 +13,6 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/inference/tensorrt/convert/op_converter.h" -#include "paddle/fluid/inference/tensorrt/plugin/prelu_op_plugin.h" namespace paddle { namespace inference { diff --git a/paddle/fluid/inference/tensorrt/engine.h b/paddle/fluid/inference/tensorrt/engine.h index 3f4df6058657326fe18364db7ddf28d1d7a0531b..a0d146c9b6dad415e67a7b0f542483f25e3dac45 100644 --- a/paddle/fluid/inference/tensorrt/engine.h +++ b/paddle/fluid/inference/tensorrt/engine.h @@ -31,7 +31,6 @@ limitations under the License. */ #include "paddle/fluid/framework/tensor.h" #include "paddle/fluid/framework/tensor_util.h" #include "paddle/fluid/inference/api/paddle_analysis_config.h" -#include "paddle/fluid/inference/engine.h" #include "paddle/fluid/inference/tensorrt/helper.h" #include "paddle/fluid/inference/tensorrt/plugin/trt_plugin.h" #include "paddle/fluid/inference/tensorrt/trt_int8_calibrator.h" diff --git a/paddle/fluid/inference/tensorrt/plugin/CMakeLists.txt b/paddle/fluid/inference/tensorrt/plugin/CMakeLists.txt index e72ead0bbadb6e02932f195cfbb1fb0b037158ec..b1df5a733623e0c4ae3f9040691dcd74e00f024c 100644 --- a/paddle/fluid/inference/tensorrt/plugin/CMakeLists.txt +++ b/paddle/fluid/inference/tensorrt/plugin/CMakeLists.txt @@ -4,7 +4,6 @@ list( trt_plugin.cc split_op_plugin.cu elementwise_op_plugin.cu - prelu_op_plugin.cu gelu_op_plugin.cu pool_op_plugin.cu swish_op_plugin.cu @@ -40,7 +39,6 @@ list( preln_groupnorm_act_op_plugin.cu elementwiseadd_transpose_op_plugin.cu generic_plugin.cu - lookup_table.cu many_emb_layernorm_plugin.cu many_emb_layernorm_kernel.cu) @@ -57,11 +55,7 @@ endif() nv_library( tensorrt_plugin SRCS ${TRT_FILES} - DEPS enforce - tensorrt_engine - prelu - tensor - bert_encoder_functor + DEPS enforce tensorrt_engine tensor bert_encoder_functor tensorrt_dynamic_shape_infermeta_factory tensorrt_plugin_arg_mapping_context) diff --git a/paddle/fluid/inference/tensorrt/plugin/lookup_table.cu b/paddle/fluid/inference/tensorrt/plugin/lookup_table.cu deleted file mode 100644 index 31d599bd2a67cdc1b72e7ad1a2a8c7e04029844e..0000000000000000000000000000000000000000 --- a/paddle/fluid/inference/tensorrt/plugin/lookup_table.cu +++ /dev/null @@ -1,348 +0,0 @@ -/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#pragma once -#include "paddle/fluid/inference/tensorrt/plugin/lookup_table.h" - -namespace paddle { -namespace inference { -namespace tensorrt { -namespace plugin { - -char const* PLUGINVERSION{"1"}; -char const* LOOKUPTABLEPLUGINNAME{"LookupTablePluginDynamic"}; - -template -__global__ void lookup_table_kernel(int weight_height, - int32_t const* inputIds, - T const* wordEmb, - int32_t const wordSize, - T* output) { - // 1. lookup word and token of the block - // blockIdx.x = position in the sequence - // blockIdx.y = batch - // gridDim.x = S - // gridDim.y = B - __shared__ int wordId; - int32_t const seqPos = blockIdx.x + blockIdx.y * gridDim.x; - if (threadIdx.x == 0) { - wordId = inputIds[seqPos]; - } - __syncthreads(); - - // 2. load word embeddings and add them toghether - // offset into embeddings is given by wordId * hidden_size - int32_t const woffset = wordId * weight_height; - // the output offset is given by b * (S*hidden_size) + s * hidden_size - int32_t const outOffset = seqPos * weight_height; - if (wordId >= 0 && wordId < wordSize) { - for (int it = threadIdx.x; it < weight_height; it += TPB) { - T const w(wordEmb[woffset + it]); - output[outOffset + it] = w; - } - } else { - printf( - "Error!!!!!!(LookupTablePlugin): ID cannot be lookup " - "table: ID < 0 or ID > max "); - return; - } -} - -template -int lookup_table(cudaStream_t stream, - int weight_height, - int B, - int S, - int32_t const* inputIds, - T const* wordEmb, - int32_t const wordSize, - T* output) { - constexpr int tpb = 256; - dim3 const grid(S, B, 1); - dim3 const block(tpb, 1, 1); - lookup_table_kernel<<>>( - weight_height, inputIds, wordEmb, wordSize, output); - return 0; -} - -// Static class fields initialization -nvinfer1::PluginFieldCollection LookupTablePluginDynamicCreator::mFC{}; -std::vector - LookupTablePluginDynamicCreator::mPluginAttributes; - -LookupTablePluginDynamic::LookupTablePluginDynamic( - nvinfer1::DataType const type, - void* weight_dev, - int32_t weight_size, - int32_t width) - : mType(type), - mWeightDev(weight_dev), - mWeightSize(weight_size), - mWeightWidth(width) {} - -LookupTablePluginDynamic::LookupTablePluginDynamic(void const* data, - size_t length) { - // Deserialize in the same order as serialization - deserialize_value(&data, &length, &mType); - deserialize_value(&data, &length, &mWeightSize); - deserialize_value(&data, &length, &mWeightWidth); - char const* d = static_cast(data); - cudaMalloc(&mWeightDev, mWeightSize * getElementSize(mType)); - cudaMemcpy(mWeightDev, - d, - mWeightSize * getElementSize(mType), - cudaMemcpyHostToDevice); -} - -// IPluginV2DynamicExt Methods -nvinfer1::IPluginV2DynamicExt* LookupTablePluginDynamic::clone() - const noexcept { - auto p = new LookupTablePluginDynamic( - mType, mWeightDev, mWeightSize, mWeightWidth); - p->setPluginNamespace(mNamespace.c_str()); - return p; -} - -nvinfer1::DimsExprs LookupTablePluginDynamic::getOutputDimensions( - int32_t outputIndex, - nvinfer1::DimsExprs const* inputs, - int32_t nbInputs, - nvinfer1::IExprBuilder& exprBuilder) noexcept { - nvinfer1::DimsExprs ret; - ret.nbDims = inputs[0].nbDims + 1; - for (int i = 0; i < inputs[0].nbDims; ++i) { - ret.d[i] = inputs[0].d[i]; - } - ret.d[inputs[0].nbDims] = exprBuilder.constant(mWeightWidth); - return ret; -} - -bool LookupTablePluginDynamic::supportsFormatCombination( - int32_t pos, - nvinfer1::PluginTensorDesc const* inOut, - int32_t nbInputs, - int32_t nbOutputs) noexcept { - nvinfer1::PluginTensorDesc const& desc = inOut[pos]; - if (desc.format != nvinfer1::TensorFormat::kLINEAR) { - return false; - } - if (pos == 0) { - return desc.type == nvinfer1::DataType::kINT32; - } - if (pos == 1) { - if (mType == nvinfer1::DataType::kFLOAT) { - return desc.type == nvinfer1::DataType::kFLOAT; - } else { - return desc.type == nvinfer1::DataType::kHALF; - } - } -} - -void LookupTablePluginDynamic::configurePlugin( - nvinfer1::DynamicPluginTensorDesc const* inputs, - int32_t nbInputs, - nvinfer1::DynamicPluginTensorDesc const* outputs, - int32_t nbOutputs) noexcept {} - -size_t LookupTablePluginDynamic::getWorkspaceSize( - nvinfer1::PluginTensorDesc const* inputs, - int32_t nbInputs, - nvinfer1::PluginTensorDesc const* outputs, - int32_t nbOutputs) const noexcept { - return 0; -} - -int32_t LookupTablePluginDynamic::enqueue( - nvinfer1::PluginTensorDesc const* inputDesc, - nvinfer1::PluginTensorDesc const* outputDesc, - void const* const* inputs, - void* const* outputs, - void* workspace, - cudaStream_t stream) noexcept { - int32_t const batchSize = inputDesc->dims.d[0]; - int32_t S; - if (inputDesc->dims.nbDims == 1) { - S = 1; - } else { - S = inputDesc->dims.d[1]; - } - int32_t mWeightHeight = mWeightSize / mWeightWidth; - int32_t status = STATUS_FAILURE; - auto const inputIds = static_cast(inputs[0]); - if (mType == nvinfer1::DataType::kFLOAT) { - auto output = static_cast(outputs[0]); - auto const Weight = static_cast(mWeightDev); - status = lookup_table(stream, - static_cast(mWeightWidth), - batchSize, - S, - inputIds, - Weight, - mWeightHeight, - output); - } else if (mType == nvinfer1::DataType::kHALF) { - auto output = static_cast(outputs[0]); - auto const Weight = static_cast(mWeightDev); - status = lookup_table(stream, - static_cast(mWeightWidth), - batchSize, - S, - inputIds, - Weight, - mWeightHeight, - output); - } - return status; -} - -// IPluginV2Ext Methods -nvinfer1::DataType LookupTablePluginDynamic::getOutputDataType( - int32_t index, - nvinfer1::DataType const* inputTypes, - int32_t nbInputs) const noexcept { - if (index == 0) { - assert(mType == nvinfer1::DataType::kHALF || - mType == nvinfer1::DataType::kFLOAT); - return mType; - } -} - -// IPluginV2 Methods -char const* LookupTablePluginDynamic::getPluginType() const noexcept { - return LOOKUPTABLEPLUGINNAME; -} - -char const* LookupTablePluginDynamic::getPluginVersion() const noexcept { - return PLUGINVERSION; -} - -int32_t LookupTablePluginDynamic::getNbOutputs() const noexcept { return 1; } - -int32_t LookupTablePluginDynamic::initialize() noexcept { return 0; } - -void LookupTablePluginDynamic::terminate() noexcept { cudaFree(mWeightDev); } - -size_t LookupTablePluginDynamic::getSerializationSize() const noexcept { - size_t const wordSize = getElementSize(mType); - return sizeof(mType) // - + sizeof(mWeightSize) // - + sizeof(mWeightWidth) // - + wordSize * mWeightSize; // -} - -void LookupTablePluginDynamic::serialize(void* buffer) const noexcept { - serialize_value(&buffer, mType); - serialize_value(&buffer, mWeightSize); - serialize_value(&buffer, mWeightWidth); - char* d = static_cast(buffer); - size_t const wordSize = getElementSize(mType); - serFromDev(&d, static_cast(mWeightDev), mWeightSize * wordSize); -} - -void LookupTablePluginDynamic::destroy() noexcept { - // This gets called when the network containing plugin is destroyed - delete this; -} - -void LookupTablePluginDynamic::setPluginNamespace( - char const* libNamespace) noexcept { - mNamespace = libNamespace; -} - -char const* LookupTablePluginDynamic::getPluginNamespace() const noexcept { - return mNamespace.c_str(); -} - -LookupTablePluginDynamicCreator::LookupTablePluginDynamicCreator() {} - -char const* LookupTablePluginDynamicCreator::getPluginName() const noexcept { - return LOOKUPTABLEPLUGINNAME; -} - -char const* LookupTablePluginDynamicCreator::getPluginVersion() const noexcept { - return PLUGINVERSION; -} - -nvinfer1::PluginFieldCollection const* -LookupTablePluginDynamicCreator::getFieldNames() noexcept { - return &mFC; -} - -bool initializeFields(nvinfer1::PluginFieldCollection const* fc, - nvinfer1::Weights* weight, - int32_t& mWeightWidth) { // NOLINT - bool output_fp16 = false; - for (int32_t i = 0; i < fc->nbFields; i++) { - std::string field_name(fc->fields[i].name); - if (field_name.compare("lookup_table_weight") == 0) { - weight->values = fc->fields[i].data; - weight->count = fc->fields[i].length; - weight->type = fieldTypeToDataType(fc->fields[i].type); - } - if (field_name.compare("lookup_table_weight_width") == 0) { - assert(fc->fields[i].type == nvinfer1::PluginFieldType::kINT32); - mWeightWidth = const_cast( - static_cast(fc->fields[i].data))[0]; // NOLINT - } - if (field_name.compare("output_fp16") == 0) { - assert(fc->fields[i].type == nvinfer1::PluginFieldType::kINT32); - output_fp16 = static_cast(fc->fields[i].data)[0] != 0; - } - } - return output_fp16; -} - -nvinfer1::IPluginV2* LookupTablePluginDynamicCreator::createPlugin( - char const* name, const nvinfer1::PluginFieldCollection* fc) noexcept { - nvinfer1::Weights weight; - int32_t mWeightWidth; - bool output_fp16 = initializeFields(fc, &weight, mWeightWidth); - nvinfer1::DataType type; - if (output_fp16) { - type = nvinfer1::DataType::kHALF; - } else { - type = nvinfer1::DataType::kFLOAT; - } - WeightsWithOwnership mWeight; - mWeight.convertAndCopy(weight, type); - void* cudaMem{nullptr}; - cudaMalloc(&cudaMem, getWeightsSize(mWeight, type)); - cudaMemcpy(cudaMem, - mWeight.values, - getWeightsSize(mWeight, type), - cudaMemcpyHostToDevice); - LookupTablePluginDynamic* p = - new LookupTablePluginDynamic(type, cudaMem, mWeight.count, mWeightWidth); - return p; -} - -nvinfer1::IPluginV2* LookupTablePluginDynamicCreator::deserializePlugin( - char const* name, void const* serialData, size_t serialLength) noexcept { - return new LookupTablePluginDynamic(serialData, serialLength); -} - -void LookupTablePluginDynamicCreator::setPluginNamespace( - char const* libNamespace) noexcept { - mNamespace = libNamespace; -} - -char const* LookupTablePluginDynamicCreator::getPluginNamespace() - const noexcept { - return mNamespace.c_str(); -} - -} // namespace plugin -} // namespace tensorrt -} // namespace inference -} // namespace paddle diff --git a/paddle/fluid/inference/tensorrt/plugin/lookup_table.h b/paddle/fluid/inference/tensorrt/plugin/lookup_table.h deleted file mode 100644 index 84f6282d4f239e3c4240438bb01539fe00bc3f39..0000000000000000000000000000000000000000 --- a/paddle/fluid/inference/tensorrt/plugin/lookup_table.h +++ /dev/null @@ -1,126 +0,0 @@ -/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#pragma once -#include -#include "NvInferPlugin.h" -#include "NvInferRuntime.h" -#include "paddle/fluid/inference/tensorrt/plugin/common/bertCommon.h" -#include "paddle/fluid/inference/tensorrt/plugin/common/serialize.h" -#include "paddle/fluid/inference/tensorrt/plugin/trt_plugin.h" -#include "paddle/fluid/platform/enforce.h" - -namespace paddle { -namespace inference { -namespace tensorrt { -namespace plugin { - -class LookupTablePluginDynamic : public nvinfer1::IPluginV2DynamicExt { - public: - LookupTablePluginDynamic(nvinfer1::DataType const type, - void* weight_dev, - int32_t weight_size, - int32_t width); - - LookupTablePluginDynamic(void const* data, size_t length); - - // It doesn't make sense to make EmbLayerNormVarSeqlenPlugin without - // arguments, so we delete default constructor. - LookupTablePluginDynamic() = delete; - - // IPluginV2DynamicExt Methods - bool supportsFormatCombination(int32_t pos, - nvinfer1::PluginTensorDesc const* inOut, - int32_t nbInputs, - int32_t nbOutputs) noexcept override; - size_t getWorkspaceSize(nvinfer1::PluginTensorDesc const* inputs, - int32_t nbInputs, - nvinfer1::PluginTensorDesc const* outputs, - int32_t nbOutputs) const noexcept override; - - // IPluginV2Ext Methods - nvinfer1::DataType getOutputDataType( - int32_t index, - nvinfer1::DataType const* inputTypes, - int32_t nbInputs) const noexcept override; - - // IPluginV2 Methods - nvinfer1::IPluginV2DynamicExt* clone() const noexcept override; - nvinfer1::DimsExprs getOutputDimensions( - int32_t outputIndex, - const nvinfer1::DimsExprs* inputs, - int32_t nbInputs, - nvinfer1::IExprBuilder& exprBuilder) noexcept override; - void configurePlugin(nvinfer1::DynamicPluginTensorDesc const* in, - int32_t nbInputs, - nvinfer1::DynamicPluginTensorDesc const* out, - int32_t nbOutputs) noexcept override; - char const* getPluginType() const noexcept override; - int32_t getNbOutputs() const noexcept override; - size_t getSerializationSize() const noexcept override; - void serialize(void* buffer) const noexcept override; - void destroy() noexcept override; - char const* getPluginNamespace() const noexcept override; - void setPluginNamespace(char const* pluginNamespace) noexcept override; - int32_t enqueue(nvinfer1::PluginTensorDesc const* inputDesc, - nvinfer1::PluginTensorDesc const* outputDesc, - void const* const* inputs, - void* const* outputs, - void* workspace, - cudaStream_t stream) noexcept override; - - int32_t initialize() noexcept override; - void terminate() noexcept override; - char const* getPluginVersion() const noexcept override; - - protected: - std::string mNamespace; - nvinfer1::DataType mType; - void* mWeightDev{nullptr}; - int32_t mWeightSize; - int32_t mWeightWidth; -}; - -class LookupTablePluginDynamicCreator : public nvinfer1::IPluginCreator { - public: - LookupTablePluginDynamicCreator(); - - char const* getPluginName() const noexcept override; - - const nvinfer1::PluginFieldCollection* getFieldNames() noexcept override; - - void setPluginNamespace(char const* pluginNamespace) noexcept override; - - char const* getPluginNamespace() const noexcept override; - - nvinfer1::IPluginV2* createPlugin( - char const* name, - const nvinfer1::PluginFieldCollection* fc) noexcept override; - char const* getPluginVersion() const noexcept override; - nvinfer1::IPluginV2* deserializePlugin(char const* name, - void const* serialData, - size_t serialLength) noexcept override; - - protected: - static nvinfer1::PluginFieldCollection mFC; - static std::vector mPluginAttributes; - std::string mNamespace; -}; - -REGISTER_TRT_PLUGIN_V2(LookupTablePluginDynamicCreator); - -} // namespace plugin -} // namespace tensorrt -} // namespace inference -} // namespace paddle diff --git a/paddle/fluid/inference/tensorrt/plugin/prelu_op_plugin.cu b/paddle/fluid/inference/tensorrt/plugin/prelu_op_plugin.cu deleted file mode 100644 index 85d9b71d34b3c6360ee98194b87e6a80a2140a84..0000000000000000000000000000000000000000 --- a/paddle/fluid/inference/tensorrt/plugin/prelu_op_plugin.cu +++ /dev/null @@ -1,224 +0,0 @@ -// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include - -#include -#include - -#include "glog/logging.h" -#include "paddle/fluid/inference/tensorrt/plugin/prelu_op_plugin.h" -#include "paddle/fluid/operators/math/prelu.h" - -namespace paddle { -namespace inference { -namespace tensorrt { -namespace plugin { - -int PReluPlugin::initialize() TRT_NOEXCEPT { - cudaMalloc(&p_gpu_weight_, sizeof(float) * weight_.size()); - cudaMemcpy(p_gpu_weight_, - weight_.data(), - weight_.size() * sizeof(float), - cudaMemcpyHostToDevice); - return 0; -} - -void PReluPlugin::terminate() TRT_NOEXCEPT { - if (p_gpu_weight_) { - cudaFree(p_gpu_weight_); - p_gpu_weight_ = nullptr; - } -} - -nvinfer1::Dims PReluPlugin::getOutputDimensions(int index, - const nvinfer1::Dims *inputDims, - int nbInputs) TRT_NOEXCEPT { - assert(nbInputs == 1); - assert(index < this->getNbOutputs()); - nvinfer1::Dims const &input_dims = inputDims[0]; - nvinfer1::Dims output_dims = input_dims; - return output_dims; -} - -int PReluPlugin::enqueue(int batch_size, - const void *const *inputs, -#if IS_TRT_VERSION_LT(8000) - void **outputs, - void *workspace, - cudaStream_t stream) { -#else - void *const *outputs, - void *workspace, - cudaStream_t stream) TRT_NOEXCEPT { -#endif - // input dims is CHW. - const auto &input_dims = this->getInputDims(0); - const float *input = reinterpret_cast(inputs[0]); - // const float *alpha = reinterpret_cast(alpha_.get().values); - const float *alpha = p_gpu_weight_; - float *const output = reinterpret_cast(outputs)[0]; - int numel = 1; - for (int i = 0; i < input_dims.nbDims; i++) { - numel *= input_dims.d[i]; - } - - if (mode_ == "channel") { - bool channel_last = data_format_ == "NHWC"; - operators::math::PreluChannelWiseDirectCUDAFunctor - prelu_channel_wise; - prelu_channel_wise(stream, - input, - alpha, - output, - input_dims.d[0], - input_dims.d[1], - channel_last, - numel); - } else if (mode_ == "element") { - operators::math::PreluElementWiseDirectCUDAFunctor - prelu_element_wise; - prelu_element_wise(stream, input, alpha, output, input_dims.d[0], numel); - } else { - operators::math::PreluScalarDirectCUDAFunctor prelu_scalar; - prelu_scalar(stream, input, alpha, output, numel); - } - return cudaGetLastError() != cudaSuccess; -} - -#if IS_TRT_VERSION_GE(6000) - -void PReluPluginDynamic::terminate() TRT_NOEXCEPT { - if (p_gpu_weight_) { - cudaFree(p_gpu_weight_); - } -} - -int PReluPluginDynamic::initialize() TRT_NOEXCEPT { - cudaMalloc(&p_gpu_weight_, sizeof(float) * weight_.size()); - cudaMemcpy(p_gpu_weight_, - weight_.data(), - weight_.size() * sizeof(float), - cudaMemcpyHostToDevice); - return 0; -} - -PReluPluginDynamic::PReluPluginDynamic(void const *serialData, - size_t serialLength) { - DeserializeValue(&serialData, &serialLength, &weight_); - const char *prelu_mode; - DeserializeValue(&serialData, &serialLength, &prelu_mode); - mode_ = std::string(prelu_mode); -} - -size_t PReluPluginDynamic::getSerializationSize() const TRT_NOEXCEPT { - return SerializedSize(mode_.c_str()) + SerializedSize(weight_); -} - -void PReluPluginDynamic::serialize(void *buffer) const TRT_NOEXCEPT { - SerializeValue(&buffer, weight_); - SerializeValue(&buffer, mode_.c_str()); -} - -nvinfer1::DimsExprs PReluPluginDynamic::getOutputDimensions( - int output_index, - const nvinfer1::DimsExprs *inputs, - int nb_inputs, - nvinfer1::IExprBuilder &expr_builder) TRT_NOEXCEPT { - return inputs[0]; -} - -bool PReluPluginDynamic::supportsFormatCombination( - int pos, - const nvinfer1::PluginTensorDesc *in_out, - int nb_inputs, - int nb_outputs) TRT_NOEXCEPT { - PADDLE_ENFORCE_NOT_NULL( - in_out, - platform::errors::InvalidArgument( - "The input of swish plugin shoule not be nullptr.")); - - PADDLE_ENFORCE_LT( - pos, - nb_inputs + nb_outputs, - platform::errors::InvalidArgument("The pos(%d) should be less than the " - "num(%d) of the input and the output.", - pos, - nb_inputs + nb_outputs)); - (in_out && pos < (nb_inputs + nb_outputs)); - - return ((in_out[pos].type == nvinfer1::DataType::kFLOAT) && - in_out[pos].format == nvinfer1::PluginFormat::kLINEAR); -} - -nvinfer1::DataType PReluPluginDynamic::getOutputDataType( - int index, - const nvinfer1::DataType *input_types, - int nb_inputs) const TRT_NOEXCEPT { - PADDLE_ENFORCE_EQ(index, - 0, - platform::errors::InvalidArgument( - "The PRelu Plugin only has one input, so the " - "index value should be 0, but get %d.", - index)); - PADDLE_ENFORCE_EQ((input_types[0] == nvinfer1::DataType::kFLOAT), - true, - platform::errors::InvalidArgument( - "The input type should be half or float")); - return input_types[0]; -} - -int PReluPluginDynamic::enqueue(const nvinfer1::PluginTensorDesc *input_desc, - const nvinfer1::PluginTensorDesc *output_desc, - const void *const *inputs, - void *const *outputs, - void *workspace, - cudaStream_t stream) TRT_NOEXCEPT { - auto input_dims = input_desc[0].dims; - const float *alpha = p_gpu_weight_; - const float *input = static_cast(inputs[0]); - float *output = static_cast(outputs[0]); - int numel = 1; - for (int i = 0; i < input_dims.nbDims; i++) { - numel *= input_dims.d[i]; - } - - if (mode_ == "channel") { - bool channel_last = data_format_ == "NHWC"; - operators::math::PreluChannelWiseDirectCUDAFunctor - prelu_channel_wise; - prelu_channel_wise(stream, - input, - alpha, - output, - input_dims.d[0], - input_dims.d[1], - channel_last, - numel); - } else if (mode_ == "element") { - operators::math::PreluElementWiseDirectCUDAFunctor - prelu_element_wise; - prelu_element_wise(stream, input, alpha, output, input_dims.d[0], numel); - } else { - operators::math::PreluScalarDirectCUDAFunctor prelu_scalar; - prelu_scalar(stream, input, alpha, output, numel); - } - return cudaGetLastError() != cudaSuccess; -} -#endif - -} // namespace plugin -} // namespace tensorrt -} // namespace inference -} // namespace paddle diff --git a/paddle/fluid/inference/tensorrt/plugin/prelu_op_plugin.h b/paddle/fluid/inference/tensorrt/plugin/prelu_op_plugin.h deleted file mode 100644 index 921f0091f333ba09167c1628991a5b6234915c27..0000000000000000000000000000000000000000 --- a/paddle/fluid/inference/tensorrt/plugin/prelu_op_plugin.h +++ /dev/null @@ -1,217 +0,0 @@ -// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#pragma once - -#include -#include -#include - -#include "paddle/fluid/framework/tensor.h" -#include "paddle/fluid/framework/tensor_util.h" -#include "paddle/fluid/inference/tensorrt/engine.h" -#include "paddle/fluid/inference/tensorrt/plugin/trt_plugin.h" - -namespace paddle { -namespace inference { -namespace tensorrt { -namespace plugin { - -class PReluPlugin : public PluginTensorRT { - std::vector weight_; - float* p_gpu_weight_; - std::string mode_; - std::string data_format_; - - public: - size_t getSerializationSize() const TRT_NOEXCEPT override { - return getBaseSerializationSize() + SerializedSize(mode_.c_str()) + - SerializedSize(data_format_.c_str()) + SerializedSize(weight_); - } - - // TRT will call this func when we need to serialize the configuration of - // tensorrt. - // It should not be called by users. - void serialize(void* buffer) const TRT_NOEXCEPT override { - serializeBase(buffer); - SerializeValue(&buffer, weight_); - SerializeValue(&buffer, mode_.c_str()); - SerializeValue(&buffer, data_format_.c_str()); - } - - PReluPlugin(const float* weight, - const int weight_num, - std::string const& mode, - std::string const& data_format) - : mode_(mode), data_format_(data_format) { - weight_.resize(weight_num); - std::copy(weight, weight + weight_num, weight_.data()); - } - - // It was used for tensorrt deserialization. - // It should not be called by users. - PReluPlugin(void const* serialData, size_t serialLength) { - deserializeBase(serialData, serialLength); - DeserializeValue(&serialData, &serialLength, &weight_); - const char* prelu_mode; - DeserializeValue(&serialData, &serialLength, &prelu_mode); - mode_ = std::string(prelu_mode); - const char* prelu_data_format; - DeserializeValue(&serialData, &serialLength, &prelu_data_format); - data_format_ = std::string(prelu_data_format); - } - ~PReluPlugin() {} - int initialize() TRT_NOEXCEPT override; - void terminate() TRT_NOEXCEPT override; - - PReluPlugin* clone() const TRT_NOEXCEPT override { - auto* ptr = - new PReluPlugin(weight_.data(), weight_.size(), mode_, data_format_); - ptr->p_gpu_weight_ = p_gpu_weight_; - return ptr; - } - - const char* getPluginType() const TRT_NOEXCEPT override { - return "prelu_plugin"; - } - int getNbOutputs() const TRT_NOEXCEPT override { return 1; } - nvinfer1::Dims getOutputDimensions(int index, - const nvinfer1::Dims* inputs, - int nbInputDims) TRT_NOEXCEPT override; -#if IS_TRT_VERSION_LT(8000) - int enqueue(int batchSize, - const void* const* inputs, - void** outputs, -#else - int enqueue(int batchSize, - const void* const* inputs, - void* const* outputs, -#endif - void* workspace, - cudaStream_t stream) TRT_NOEXCEPT override; -}; - -class PReluPluginCreator : public TensorRTPluginCreator { - public: - const char* getPluginName() const TRT_NOEXCEPT override { - return "prelu_plugin"; - } - - const char* getPluginVersion() const TRT_NOEXCEPT override { return "1"; } - - nvinfer1::IPluginV2* deserializePlugin(const char* name, - const void* serial_data, - size_t serial_length) - TRT_NOEXCEPT override { - return new PReluPlugin(serial_data, serial_length); - } -}; -REGISTER_TRT_PLUGIN_V2(PReluPluginCreator); - -#if IS_TRT_VERSION_GE(6000) -class PReluPluginDynamic : public DynamicPluginTensorRT { - public: - PReluPluginDynamic(const float* weight, - const int weight_num, - std::string const& mode, - std::string const& data_format) - : mode_(mode), data_format_(data_format) { - weight_.resize(weight_num); - std::copy(weight, weight + weight_num, weight_.data()); - } - - PReluPluginDynamic(void const* serialData, size_t serialLength); - ~PReluPluginDynamic() {} - nvinfer1::IPluginV2DynamicExt* clone() const TRT_NOEXCEPT override { - auto ptr = new PReluPluginDynamic( - weight_.data(), weight_.size(), mode_, data_format_); - ptr->p_gpu_weight_ = p_gpu_weight_; - return ptr; - } - - const char* getPluginType() const TRT_NOEXCEPT override { - return "prelu_plugin_dynamic"; - } - int getNbOutputs() const TRT_NOEXCEPT override { return 1; } - int initialize() TRT_NOEXCEPT override; - void terminate() TRT_NOEXCEPT override; - - size_t getSerializationSize() const TRT_NOEXCEPT override; - void serialize(void* buffer) const TRT_NOEXCEPT override; - - nvinfer1::DimsExprs getOutputDimensions(int output_index, - const nvinfer1::DimsExprs* inputs, - int nb_inputs, - nvinfer1::IExprBuilder& expr_builder) - TRT_NOEXCEPT override; - - bool supportsFormatCombination(int pos, - const nvinfer1::PluginTensorDesc* inOut, - int nbInputs, - int nbOutputs) TRT_NOEXCEPT override; - - void configurePlugin(const nvinfer1::DynamicPluginTensorDesc* in, - int nbInputs, - const nvinfer1::DynamicPluginTensorDesc* out, - int nbOutputs) TRT_NOEXCEPT override {} - - size_t getWorkspaceSize(const nvinfer1::PluginTensorDesc* inputs, - int nbInputs, - const nvinfer1::PluginTensorDesc* outputs, - int nbOutputs) const TRT_NOEXCEPT override { - return 0; - } - - int enqueue(const nvinfer1::PluginTensorDesc* inputDesc, - const nvinfer1::PluginTensorDesc* outputDesc, - const void* const* inputs, - void* const* outputs, - void* workspace, - cudaStream_t stream) TRT_NOEXCEPT override; - nvinfer1::DataType getOutputDataType(int index, - const nvinfer1::DataType* inputTypes, - int nbInputs) const - TRT_NOEXCEPT override; - - void destroy() TRT_NOEXCEPT override { delete this; } - - private: - std::vector weight_; - float* p_gpu_weight_; - std::string mode_; - std::string data_format_; -}; -#endif - -class PReluPluginDynamicCreator : public TensorRTPluginCreator { - public: - const char* getPluginName() const TRT_NOEXCEPT override { - return "prelu_plugin_dynamic"; - } - - const char* getPluginVersion() const TRT_NOEXCEPT override { return "1"; } - - nvinfer1::IPluginV2* deserializePlugin(const char* name, - const void* serial_data, - size_t serial_length) - TRT_NOEXCEPT override { - return new PReluPluginDynamic(serial_data, serial_length); - } -}; -REGISTER_TRT_PLUGIN_V2(PReluPluginDynamicCreator); - -} // namespace plugin -} // namespace tensorrt -} // namespace inference -} // namespace paddle