提交 94a57f1d 编写于 作者: N nhzlx

add trt config to arguments

上级 68fb818a
...@@ -97,8 +97,9 @@ void DataFlowGraphToFluidPass::AddFluidOp(Node *node) { ...@@ -97,8 +97,9 @@ void DataFlowGraphToFluidPass::AddFluidOp(Node *node) {
} }
} }
void CreateTrtEngineOp(Node *node, const DataFlowGraph &graph, void CreateTrtEngineOp(Node *node, Argument *argument,
framework::proto::BlockDesc *block) { framework::proto::BlockDesc *block) {
const DataFlowGraph &graph = *(argument->main_dfg);
static int counter{0}; static int counter{0};
PADDLE_ENFORCE(node->IsFunctionBlock()); PADDLE_ENFORCE(node->IsFunctionBlock());
framework::OpDesc desc; framework::OpDesc desc;
...@@ -204,7 +205,10 @@ void CreateTrtEngineOp(Node *node, const DataFlowGraph &graph, ...@@ -204,7 +205,10 @@ void CreateTrtEngineOp(Node *node, const DataFlowGraph &graph,
PADDLE_ENFORCE(!block->vars().empty(), "the block has no var-desc"); PADDLE_ENFORCE(!block->vars().empty(), "the block has no var-desc");
// Set attrs // Set attrs
SetAttr(desc.Proto(), "subgraph", block->SerializeAsString()); SetAttr(desc.Proto(), "subgraph", block->SerializeAsString());
SetAttr(desc.Proto(), "max_batch_size", argument->Get<int>("max_batch_size"));
SetAttr(desc.Proto(), "workspace_size", argument->Get<int>("workspace_size"));
SetAttr(desc.Proto(), "engine_uniq_key", "trt-" + std::to_string(counter++)); SetAttr(desc.Proto(), "engine_uniq_key", "trt-" + std::to_string(counter++));
SetAttr(desc.Proto(), "parameters", ExtractParameters(graph.nodes.nodes())); SetAttr(desc.Proto(), "parameters", ExtractParameters(graph.nodes.nodes()));
SetAttr(desc.Proto(), "output_name_mapping", output_mapping); SetAttr(desc.Proto(), "output_name_mapping", output_mapping);
...@@ -248,7 +252,7 @@ void DataFlowGraphToFluidPass::AddEngineOp(Node *node) { ...@@ -248,7 +252,7 @@ void DataFlowGraphToFluidPass::AddEngineOp(Node *node) {
*block_desc.Proto()->mutable_vars() = *block_desc.Proto()->mutable_vars() =
argument_->origin_program_desc->blocks(0).vars(); argument_->origin_program_desc->blocks(0).vars();
PADDLE_ENFORCE(!block_desc.Proto()->vars().empty()); PADDLE_ENFORCE(!block_desc.Proto()->vars().empty());
CreateTrtEngineOp(node, *argument_->main_dfg, block_desc.Proto()); CreateTrtEngineOp(node, argument_, block_desc.Proto());
auto *main_block = desc_->mutable_blocks(framework::kRootBlockIndex); auto *main_block = desc_->mutable_blocks(framework::kRootBlockIndex);
auto *op = main_block->add_ops(); auto *op = main_block->add_ops();
PADDLE_ENFORCE(!node->pb_msg().empty(), "failed to set desc for block"); PADDLE_ENFORCE(!node->pb_msg().empty(), "failed to set desc for block");
......
...@@ -309,7 +309,8 @@ void SubGraphFuse::operator()() { ReplaceNodesWithSubGraphs(); } ...@@ -309,7 +309,8 @@ void SubGraphFuse::operator()() { ReplaceNodesWithSubGraphs(); }
void SubGraphFuse::ReplaceNodesWithSubGraphs() { void SubGraphFuse::ReplaceNodesWithSubGraphs() {
auto subgraphs = SubGraphSplitter(graph_, node_inside_subgraph_teller_)(); auto subgraphs = SubGraphSplitter(graph_, node_inside_subgraph_teller_)();
for (auto &subgraph : subgraphs) { for (auto &subgraph : subgraphs) {
if (subgraph.size() <= 3) continue; if (subgraph.size() <= argument_->Get<int>("minimun_subgraph_size"))
continue;
std::unordered_set<Node *> subgraph_uniq(subgraph.begin(), subgraph.end()); std::unordered_set<Node *> subgraph_uniq(subgraph.begin(), subgraph.end());
// replace this sub-graph with the first node. Two steps: 1. Create a Block // replace this sub-graph with the first node. Two steps: 1. Create a Block
// Node that contains this subgraph 2. Mark the nodes inside the sub-graph // Node that contains this subgraph 2. Mark the nodes inside the sub-graph
......
...@@ -20,6 +20,7 @@ limitations under the License. */ ...@@ -20,6 +20,7 @@ limitations under the License. */
#include <vector> #include <vector>
#include "paddle/fluid/inference/analysis/argument.h"
#include "paddle/fluid/inference/analysis/data_flow_graph.h" #include "paddle/fluid/inference/analysis/data_flow_graph.h"
#include "paddle/fluid/inference/analysis/node.h" #include "paddle/fluid/inference/analysis/node.h"
...@@ -63,8 +64,11 @@ class SubGraphFuse { ...@@ -63,8 +64,11 @@ class SubGraphFuse {
public: public:
using NodeInsideSubgraphTeller = SubGraphSplitter::NodeInsideSubgraphTeller; using NodeInsideSubgraphTeller = SubGraphSplitter::NodeInsideSubgraphTeller;
SubGraphFuse(DataFlowGraph *graph, const NodeInsideSubgraphTeller &teller) SubGraphFuse(DataFlowGraph *graph, const NodeInsideSubgraphTeller &teller,
: graph_(graph), node_inside_subgraph_teller_(teller) {} Argument *argument)
: graph_(graph),
node_inside_subgraph_teller_(teller),
argument_(argument) {}
// The main method which run all the logic. // The main method which run all the logic.
void operator()(); void operator()();
...@@ -76,6 +80,7 @@ class SubGraphFuse { ...@@ -76,6 +80,7 @@ class SubGraphFuse {
private: private:
DataFlowGraph *graph_; DataFlowGraph *graph_;
NodeInsideSubgraphTeller node_inside_subgraph_teller_; NodeInsideSubgraphTeller node_inside_subgraph_teller_;
Argument *argument_;
}; };
} // namespace analysis } // namespace analysis
......
...@@ -24,7 +24,7 @@ TensorRTSubGraphPass::TensorRTSubGraphPass( ...@@ -24,7 +24,7 @@ TensorRTSubGraphPass::TensorRTSubGraphPass(
: node_inside_subgraph_teller_(teller) {} : node_inside_subgraph_teller_(teller) {}
void TensorRTSubGraphPass::Run(DataFlowGraph *graph) { void TensorRTSubGraphPass::Run(DataFlowGraph *graph) {
SubGraphFuse(graph, node_inside_subgraph_teller_)(); SubGraphFuse(graph, node_inside_subgraph_teller_, argument_)();
VLOG(4) << "debug info " VLOG(4) << "debug info "
<< graph->HumanReadableInfo(false /*show_values*/, << graph->HumanReadableInfo(false /*show_values*/,
true /*show_functions*/); true /*show_functions*/);
......
...@@ -33,7 +33,10 @@ class TensorRTSubGraphPass : public DataFlowGraphPass { ...@@ -33,7 +33,10 @@ class TensorRTSubGraphPass : public DataFlowGraphPass {
explicit TensorRTSubGraphPass(const NodeInsideSubgraphTeller& teller); explicit TensorRTSubGraphPass(const NodeInsideSubgraphTeller& teller);
bool Initialize(Argument* argument) override { return true; } bool Initialize(Argument* argument) override {
argument_ = argument;
return true;
}
// This class get a sub-graph as input and determine whether to transform this // This class get a sub-graph as input and determine whether to transform this
// sub-graph into TensorRT. // sub-graph into TensorRT.
...@@ -46,6 +49,7 @@ class TensorRTSubGraphPass : public DataFlowGraphPass { ...@@ -46,6 +49,7 @@ class TensorRTSubGraphPass : public DataFlowGraphPass {
private: private:
NodeInsideSubgraphTeller node_inside_subgraph_teller_; NodeInsideSubgraphTeller node_inside_subgraph_teller_;
Argument* argument_;
}; };
} // namespace analysis } // namespace analysis
......
...@@ -34,8 +34,6 @@ class TensorRTSubgraphPredictor : public NativePaddlePredictor { ...@@ -34,8 +34,6 @@ class TensorRTSubgraphPredictor : public NativePaddlePredictor {
bool Init(const std::shared_ptr<framework::Scope>& parent_scope) { bool Init(const std::shared_ptr<framework::Scope>& parent_scope) {
FLAGS_IA_enable_tensorrt_subgraph_engine = true; FLAGS_IA_enable_tensorrt_subgraph_engine = true;
VLOG(3) << "Predictor::init()"; VLOG(3) << "Predictor::init()";
FLAGS_tensorrt_max_batch_size = config_.max_batch_size;
FLAGS_tensorrt_workspace_size = config_.workspace_size;
if (config_.use_gpu) { if (config_.use_gpu) {
place_ = paddle::platform::CUDAPlace(config_.device); place_ = paddle::platform::CUDAPlace(config_.device);
} else { } else {
...@@ -91,6 +89,16 @@ class TensorRTSubgraphPredictor : public NativePaddlePredictor { ...@@ -91,6 +89,16 @@ class TensorRTSubgraphPredictor : public NativePaddlePredictor {
void OptimizeInferenceProgram() { void OptimizeInferenceProgram() {
// Analyze inference_program // Analyze inference_program
Argument argument; Argument argument;
int* minimum_subgraph_size = new int(config_.minimun_subgraph_size);
int* max_batch_size = new int(config_.max_batch_size);
int* workspace_size = new int(config_.workspace_size);
std::string* precision_mode = new std::string(config_.precision_mode);
argument.Set<int>("minimun_subgraph_size", minimum_subgraph_size);
argument.Set<int>("max_batch_size", max_batch_size);
argument.Set<int>("workspace_size", workspace_size);
argument.Set<std::string>("precision_mode", precision_mode);
if (!config_.model_dir.empty()) { if (!config_.model_dir.empty()) {
argument.fluid_model_dir.reset(new std::string(config_.model_dir)); argument.fluid_model_dir.reset(new std::string(config_.model_dir));
} else { } else {
......
...@@ -150,6 +150,15 @@ struct TensorRTConfig : public NativeConfig { ...@@ -150,6 +150,15 @@ struct TensorRTConfig : public NativeConfig {
// For workspace_size, refer it from here: // For workspace_size, refer it from here:
// https://docs.nvidia.com/deeplearning/sdk/tensorrt-developer-guide/index.html#troubleshooting // https://docs.nvidia.com/deeplearning/sdk/tensorrt-developer-guide/index.html#troubleshooting
int workspace_size{1 << 30}; int workspace_size{1 << 30};
// We transform the Ops that can be converted into TRT layer in the model,
// and aggregate these Ops into subgraphs for TRT execution.
// We set this variable to control the minimum number of nodes in the
// subgraph, 3 as
// default value.
int minimun_subgraph_size = 3;
// Reserved configuration
// We just support "FP32" now, "FP16" and "INT8" will be supported.
std::string precision_mode = "FP32";
}; };
// NOTE WIP, not stable yet. // NOTE WIP, not stable yet.
......
...@@ -99,7 +99,7 @@ TEST(trt_models_test, main) { ...@@ -99,7 +99,7 @@ TEST(trt_models_test, main) {
std::vector<std::string> infer_models = {"mobilenet", "resnet50", std::vector<std::string> infer_models = {"mobilenet", "resnet50",
"resnext50"}; "resnext50"};
for (auto &model_dir : infer_models) { for (auto &model_dir : infer_models) {
CompareTensorRTWithFluid(1, FLAGS_dirname + "/" + model_dir); CompareTensorRTWithFluid(5, FLAGS_dirname + "/" + model_dir);
} }
} }
} // namespace paddle } // namespace paddle
...@@ -22,8 +22,6 @@ ...@@ -22,8 +22,6 @@
namespace paddle { namespace paddle {
DEFINE_int32(tensorrt_engine_batch_size, 1, "the batch_size of TensorRT"); DEFINE_int32(tensorrt_engine_batch_size, 1, "the batch_size of TensorRT");
DEFINE_int32(tensorrt_max_batch_size, 1, "TensorRT maximum batch size");
DEFINE_int32(tensorrt_workspace_size, 16 << 20, "TensorRT workspace size");
namespace operators { namespace operators {
...@@ -34,6 +32,8 @@ class TensorRTEngineOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -34,6 +32,8 @@ class TensorRTEngineOpMaker : public framework::OpProtoAndCheckerMaker {
AddOutput("Ys", "A list of outputs").AsDuplicable(); AddOutput("Ys", "A list of outputs").AsDuplicable();
AddAttr<std::string>("subgraph", "the subgraph."); AddAttr<std::string>("subgraph", "the subgraph.");
AddAttr<std::string>("engine_uniq_key", "unique key for the TRT engine."); AddAttr<std::string>("engine_uniq_key", "unique key for the TRT engine.");
AddAttr<int>("max_batch_size", "the maximum batch size.");
AddAttr<int>("workspace_size", "the maximum batch size.");
AddComment("TensorRT engine operator."); AddComment("TensorRT engine operator.");
} }
}; };
......
...@@ -28,8 +28,6 @@ ...@@ -28,8 +28,6 @@
namespace paddle { namespace paddle {
DECLARE_int32(tensorrt_engine_batch_size); DECLARE_int32(tensorrt_engine_batch_size);
DECLARE_int32(tensorrt_max_batch_size);
DECLARE_int32(tensorrt_workspace_size);
namespace operators { namespace operators {
...@@ -92,14 +90,14 @@ class TensorRTEngineKernel : public framework::OpKernel<T> { ...@@ -92,14 +90,14 @@ class TensorRTEngineKernel : public framework::OpKernel<T> {
public: public:
void Compute(const framework::ExecutionContext& context) const override { void Compute(const framework::ExecutionContext& context) const override {
auto engine_name = context.Attr<std::string>("engine_uniq_key"); auto engine_name = context.Attr<std::string>("engine_uniq_key");
int max_batch_size = context.Attr<int>("max_batch_size");
if (!Singleton<TRT_EngineManager>::Global().HasEngine(engine_name)) { if (!Singleton<TRT_EngineManager>::Global().HasEngine(engine_name)) {
Prepare(context); Prepare(context);
} }
auto* engine = Singleton<TRT_EngineManager>::Global().Get(engine_name); auto* engine = Singleton<TRT_EngineManager>::Global().Get(engine_name);
auto input_names = context.op().Inputs("Xs"); auto input_names = context.op().Inputs("Xs");
PADDLE_ENFORCE(!input_names.empty(), "should pass more than one inputs"); PADDLE_ENFORCE(!input_names.empty(), "should pass more than one inputs");
PADDLE_ENFORCE_LE(FLAGS_tensorrt_engine_batch_size, PADDLE_ENFORCE_LE(FLAGS_tensorrt_engine_batch_size, max_batch_size);
FLAGS_tensorrt_max_batch_size);
std::vector<std::string> output_maps = std::vector<std::string> output_maps =
context.Attr<std::vector<std::string>>("output_name_mapping"); context.Attr<std::vector<std::string>>("output_name_mapping");
...@@ -173,8 +171,9 @@ class TensorRTEngineKernel : public framework::OpKernel<T> { ...@@ -173,8 +171,9 @@ class TensorRTEngineKernel : public framework::OpKernel<T> {
// Get the ProgramDesc and pass to convert. // Get the ProgramDesc and pass to convert.
framework::proto::BlockDesc block_desc; framework::proto::BlockDesc block_desc;
block_desc.ParseFromString(context.Attr<std::string>("subgraph")); block_desc.ParseFromString(context.Attr<std::string>("subgraph"));
int max_batch = FLAGS_tensorrt_max_batch_size; int max_batch_size = context.Attr<int>("max_batch_size");
auto max_workspace = FLAGS_tensorrt_workspace_size; int workspace_size = context.Attr<int>("workspace_size");
auto params = context.Attr<std::vector<std::string>>("parameters"); auto params = context.Attr<std::vector<std::string>>("parameters");
std::unordered_set<std::string> parameters; std::unordered_set<std::string> parameters;
for (const auto& param : params) { for (const auto& param : params) {
...@@ -186,7 +185,7 @@ class TensorRTEngineKernel : public framework::OpKernel<T> { ...@@ -186,7 +185,7 @@ class TensorRTEngineKernel : public framework::OpKernel<T> {
// TODO(Superjomn) replace this with a different stream // TODO(Superjomn) replace this with a different stream
auto* engine = Singleton<TRT_EngineManager>::Global().Create( auto* engine = Singleton<TRT_EngineManager>::Global().Create(
max_batch, max_workspace, nullptr /*engine hold its own stream*/, max_batch_size, workspace_size, nullptr /*engine hold its own stream*/,
context.Attr<std::string>("engine_uniq_key"), context.Attr<std::string>("engine_uniq_key"),
boost::get<platform::CUDAPlace>(context.GetPlace()).device); boost::get<platform::CUDAPlace>(context.GetPlace()).device);
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册