未验证 提交 ddcd1b61 编写于 作者: Y Yuanle Liu 提交者: GitHub

[cherry-pick][Inference] support mixed precision inference (#49077)

* [Release2.4] Revert python link prs (#48573)

* Revert "Fix mac link python (#48017)"

This reverts commit 3fa7a736.

* Revert "[Cherry-pick] Fix python link error (#47811)"

This reverts commit ff642c68.

* Update config.go

* [Paddle Inference] Add float_to_half_pass to support  inference with mixed precision (#47993)

* [Inference] optimize some code and fix some bug (#48780)

* clean ir_pass_manager and fix map_depthwise_conv_to_conv_pass

* fix unitest timeout

* [Paddle Inference] clean unused code  (#48392)

* fix

* update

* update
Co-authored-by: NChen Weihang <chenweihang@baidu.com>
上级 9e2ba9b9
...@@ -148,6 +148,7 @@ pass_library(delete_c_identity_op_pass inference) ...@@ -148,6 +148,7 @@ pass_library(delete_c_identity_op_pass inference)
pass_library(preln_residual_bias_fuse_pass inference) pass_library(preln_residual_bias_fuse_pass inference)
pass_library(delete_fill_constant_op_pass inference) pass_library(delete_fill_constant_op_pass inference)
pass_library(constant_folding_pass inference) pass_library(constant_folding_pass inference)
pass_library(auto_mixed_precision_pass inference)
pass_library(simplify_with_basic_ops_pass base) pass_library(simplify_with_basic_ops_pass base)
pass_library(fc_elementwise_layernorm_fuse_pass base) pass_library(fc_elementwise_layernorm_fuse_pass base)
pass_library(skip_layernorm_fuse_pass base) pass_library(skip_layernorm_fuse_pass base)
......
此差异已折叠。
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <string>
#include <unordered_map>
#include <unordered_set>
#include "paddle/fluid/framework/ir/fuse_pass_base.h"
#include "paddle/fluid/framework/ir/node.h"
#include "paddle/phi/common/backend.h"
#include "paddle/phi/common/data_type.h"
namespace paddle {
namespace framework {
namespace ir {
class AutoMixedPrecisionPass : public FusePassBase {
public:
using VarType = framework::proto::VarType;
public:
AutoMixedPrecisionPass() = default;
~AutoMixedPrecisionPass() = default;
protected:
void ApplyImpl(Graph* graph) const override;
private:
void Init(Graph* graph) const;
void SetDefaultBlacklist() const;
void SetOpUniqueType() const;
void RestoreOpOriginType() const;
inline std::string GetOpOriginalType(const std::string& op_type) const;
void GetOpPrecision() const;
void UpdateOpPrecision() const;
void InsertCastOp() const;
void ProcessOpWithDtypeAttr() const;
bool InputVarsNotConvert(Node* op_node, const std::string& var_name) const;
bool OutputVarsNotConvert(Node* op_node, const std::string& var_name) const;
void SetVarPrecision() const;
void ConvertWeightsData() const;
private:
mutable bool skip_pass_{false};
mutable bool keep_io_types_{false};
// float16 or bfloat16 now
mutable phi::DataType low_precision_{phi::DataType::FLOAT16};
mutable phi::Backend backend_{phi::Backend::GPU};
mutable std::unordered_set<std::string> black_list_;
// subgraph id -> pointer to subgraph
mutable std::vector<Graph*> subgraphes_;
// var name -> real var node
mutable std::unordered_map<std::string, Node*> real_vars_;
// subgraph id -> all op nodes in subgraph
mutable std::vector<std::vector<Node*>> all_op_nodes_;
// op's unique type -> the op's origin type
mutable std::unordered_map<std::string, std::string> op_original_type_;
// op's unique type -> whether the op run at low precision
mutable std::unordered_set<std::string> op_run_low_precision_;
mutable std::unordered_set<std::string> vars_convert_to_low_precision_;
};
bool OpSupportPrecision(const std::string& op_type,
phi::Backend backend,
phi::DataType precision,
const std::unordered_set<std::string>& black_list);
void DoInsertCastOp(Graph* graph,
Node* var_node,
Node* op_node,
proto::VarType::Type from_type,
proto::VarType::Type to_type,
framework::BlockDesc* block_desc,
int* suffix,
std::unordered_map<Node*, Node*>* cache);
} // namespace ir
} // namespace framework
} // namespace paddle
...@@ -29,6 +29,11 @@ void FillConstData(LoDTensor* out_t, T value) { ...@@ -29,6 +29,11 @@ void FillConstData(LoDTensor* out_t, T value) {
} }
void DeleteFillConstantOpPass::ApplyImpl(ir::Graph* graph) const { void DeleteFillConstantOpPass::ApplyImpl(ir::Graph* graph) const {
bool with_dynamic_shape = Get<bool>("with_dynamic_shape");
// Not support
if (with_dynamic_shape) {
return;
}
FusePassBase::Init("delete_fill_constant_op_pass", graph); FusePassBase::Init("delete_fill_constant_op_pass", graph);
GraphPatternDetector detector; GraphPatternDetector detector;
auto fill_constant_op = auto fill_constant_op =
......
...@@ -75,7 +75,6 @@ Graph::Graph(const ProgramDesc &program, ...@@ -75,7 +75,6 @@ Graph::Graph(const ProgramDesc &program,
} }
} else { } else {
auto var_nodes = InitFromProgram(program_, start_op_index, end_op_index); auto var_nodes = InitFromProgram(program_, start_op_index, end_op_index);
ResolveHazard(var_nodes);
} }
} }
...@@ -88,7 +87,6 @@ Graph::Graph(const BlockDesc &block, ...@@ -88,7 +87,6 @@ Graph::Graph(const BlockDesc &block,
const int64_t end_op_index) const int64_t end_op_index)
: main_graph_(main_graph) { : main_graph_(main_graph) {
auto var_nodes = InitFromBlock(block, start_op_index, end_op_index); auto var_nodes = InitFromBlock(block, start_op_index, end_op_index);
ResolveHazard(var_nodes);
} }
// TODO(levi): delete this interface after when we can convert all // TODO(levi): delete this interface after when we can convert all
......
...@@ -130,86 +130,6 @@ TEST(GraphTest, Basic) { ...@@ -130,86 +130,6 @@ TEST(GraphTest, Basic) {
ASSERT_EQ(nodes.size(), 5UL); ASSERT_EQ(nodes.size(), 5UL);
} }
TEST(GraphTest, WriteAfterRead) {
// void Test() {
ProgramDesc prog;
auto *op = prog.MutableBlock(0)->AppendOp();
op->SetType("sum");
op->SetInput("X", {"a"});
op->SetOutput("Out", {"b"});
op->SetAttr("op_role", 1);
op = prog.MutableBlock(0)->AppendOp();
op->SetType("dummy");
op->SetInput("X", {"c"});
op->SetOutput("Out", {"a"});
op->SetAttr("op_role", 1);
prog.MutableBlock(0)->Var("a")->SetType(proto::VarType::LOD_TENSOR);
prog.MutableBlock(0)->Var("b")->SetType(proto::VarType::LOD_TENSOR);
prog.MutableBlock(0)->Var("c")->SetType(proto::VarType::LOD_TENSOR);
std::unique_ptr<ir::Graph> g(new ir::Graph(prog));
ir::Node *control_dep1 = nullptr;
ir::Node *control_dep2 = nullptr;
for (ir::Node *n : g->Nodes()) {
if (n->Name() == "sum") {
ASSERT_EQ(n->outputs[0]->Name(), "b");
ASSERT_TRUE(ir::IsControlDepVar(*n->outputs[1]));
control_dep1 = n->outputs[1];
ASSERT_EQ(n->outputs.size(), 2UL);
}
if (n->Name() == "dummy") {
ASSERT_EQ(n->inputs[0]->Name(), "c");
ASSERT_TRUE(ir::IsControlDepVar(*n->inputs[1]));
control_dep2 = n->inputs[1];
ASSERT_EQ(n->inputs.size(), 2UL);
}
}
ASSERT_EQ(control_dep1, control_dep2);
}
TEST(GraphTest, WriteAfterWrite) {
// void Test() {
ProgramDesc prog;
auto *op = prog.MutableBlock(0)->AppendOp();
op->SetType("sum");
op->SetInput("X", {"a"});
op->SetOutput("Out", {"b"});
op->SetAttr("op_role", 1);
op = prog.MutableBlock(0)->AppendOp();
op->SetType("dummy");
op->SetInput("X", {"c"});
op->SetOutput("Out", {"b"});
op->SetAttr("op_role", 1);
prog.MutableBlock(0)->Var("a")->SetType(proto::VarType::LOD_TENSOR);
prog.MutableBlock(0)->Var("b")->SetType(proto::VarType::LOD_TENSOR);
prog.MutableBlock(0)->Var("c")->SetType(proto::VarType::LOD_TENSOR);
std::unique_ptr<ir::Graph> g(new ir::Graph(prog));
ir::Node *control_dep1 = nullptr;
ir::Node *control_dep2 = nullptr;
for (ir::Node *n : g->Nodes()) {
if (n->Name() == "sum") {
ASSERT_EQ(n->outputs[0]->Name(), "b");
ASSERT_TRUE(ir::IsControlDepVar(*n->outputs[1]));
ASSERT_EQ(n->outputs.size(), 2UL);
control_dep1 = n->outputs[1];
}
if (n->Name() == "dummy") {
ASSERT_EQ(n->inputs[0]->Name(), "c");
ASSERT_TRUE(ir::IsControlDepVar(*n->inputs[1]));
control_dep2 = n->inputs[1];
ASSERT_EQ(n->inputs.size(), 2UL);
}
}
ASSERT_NE(control_dep1, nullptr);
ASSERT_NE(control_dep2, nullptr);
ASSERT_EQ(control_dep1, control_dep2);
}
TEST(GraphTest, TestException) { TEST(GraphTest, TestException) {
ProgramDesc prog; ProgramDesc prog;
std::unique_ptr<ir::Graph> g(new ir::Graph(prog)); std::unique_ptr<ir::Graph> g(new ir::Graph(prog));
...@@ -350,12 +270,13 @@ TEST(GraphTest, TestMultiBlock) { ...@@ -350,12 +270,13 @@ TEST(GraphTest, TestMultiBlock) {
op = prog.MutableBlock(1)->AppendOp(); op = prog.MutableBlock(1)->AppendOp();
op->SetType("dummy"); op->SetType("dummy");
op->SetInput("X", {"c"}); op->SetInput("X", {"c"});
op->SetOutput("Out", {"a"}); op->SetOutput("Out", {"d"});
op->SetAttr("op_role", 1); op->SetAttr("op_role", 1);
prog.MutableBlock(1)->Var("a")->SetType(proto::VarType::LOD_TENSOR); prog.MutableBlock(1)->Var("a")->SetType(proto::VarType::LOD_TENSOR);
prog.MutableBlock(1)->Var("b")->SetType(proto::VarType::LOD_TENSOR); prog.MutableBlock(1)->Var("b")->SetType(proto::VarType::LOD_TENSOR);
prog.MutableBlock(1)->Var("c")->SetType(proto::VarType::LOD_TENSOR); prog.MutableBlock(1)->Var("c")->SetType(proto::VarType::LOD_TENSOR);
prog.MutableBlock(1)->Var("d")->SetType(proto::VarType::LOD_TENSOR);
// Set contents in block_2. // Set contents in block_2.
op = prog.MutableBlock(2)->AppendOp(); op = prog.MutableBlock(2)->AppendOp();
...@@ -367,12 +288,13 @@ TEST(GraphTest, TestMultiBlock) { ...@@ -367,12 +288,13 @@ TEST(GraphTest, TestMultiBlock) {
op = prog.MutableBlock(2)->AppendOp(); op = prog.MutableBlock(2)->AppendOp();
op->SetType("dummy"); op->SetType("dummy");
op->SetInput("X", {"c"}); op->SetInput("X", {"c"});
op->SetOutput("Out", {"b"}); op->SetOutput("Out", {"d"});
op->SetAttr("op_role", 1); op->SetAttr("op_role", 1);
prog.MutableBlock(2)->Var("a")->SetType(proto::VarType::LOD_TENSOR); prog.MutableBlock(2)->Var("a")->SetType(proto::VarType::LOD_TENSOR);
prog.MutableBlock(2)->Var("b")->SetType(proto::VarType::LOD_TENSOR); prog.MutableBlock(2)->Var("b")->SetType(proto::VarType::LOD_TENSOR);
prog.MutableBlock(2)->Var("c")->SetType(proto::VarType::LOD_TENSOR); prog.MutableBlock(2)->Var("c")->SetType(proto::VarType::LOD_TENSOR);
prog.MutableBlock(1)->Var("d")->SetType(proto::VarType::LOD_TENSOR);
// Step2: Convert program into graph, 3 blocks corresponding 3 sub_graphs. // Step2: Convert program into graph, 3 blocks corresponding 3 sub_graphs.
std::unique_ptr<ir::Graph> g(new ir::Graph(prog)); std::unique_ptr<ir::Graph> g(new ir::Graph(prog));
...@@ -399,45 +321,29 @@ TEST(GraphTest, TestMultiBlock) { ...@@ -399,45 +321,29 @@ TEST(GraphTest, TestMultiBlock) {
// Check contents in sub_graph_1. // Check contents in sub_graph_1.
const ir::Graph *g1 = g->GetSubGraph(1); const ir::Graph *g1 = g->GetSubGraph(1);
ir::Node *control_dep1 = nullptr;
ir::Node *control_dep2 = nullptr;
for (ir::Node *n : g1->Nodes()) { for (ir::Node *n : g1->Nodes()) {
if (n->Name() == "sum") { if (n->Name() == "sum") {
ASSERT_EQ(n->outputs[0]->Name(), "b"); ASSERT_EQ(n->outputs[0]->Name(), "b");
ASSERT_TRUE(ir::IsControlDepVar(*n->outputs[1])); ASSERT_EQ(n->outputs.size(), 1UL);
control_dep1 = n->outputs[1];
ASSERT_EQ(n->outputs.size(), 2UL);
} }
if (n->Name() == "dummy") { if (n->Name() == "dummy") {
ASSERT_EQ(n->inputs[0]->Name(), "c"); ASSERT_EQ(n->inputs[0]->Name(), "c");
ASSERT_TRUE(ir::IsControlDepVar(*n->inputs[1])); ASSERT_EQ(n->inputs.size(), 1UL);
control_dep2 = n->inputs[1];
ASSERT_EQ(n->inputs.size(), 2UL);
} }
} }
ASSERT_EQ(control_dep1, control_dep2);
// Check contents in sub_graph_2. // Check contents in sub_graph_2.
const ir::Graph *g2 = g->GetSubGraph(2); const ir::Graph *g2 = g->GetSubGraph(2);
control_dep1 = nullptr;
control_dep2 = nullptr;
for (ir::Node *n : g2->Nodes()) { for (ir::Node *n : g2->Nodes()) {
if (n->Name() == "sum") { if (n->Name() == "sum") {
ASSERT_EQ(n->outputs[0]->Name(), "b"); ASSERT_EQ(n->outputs[0]->Name(), "b");
ASSERT_TRUE(ir::IsControlDepVar(*n->outputs[1])); ASSERT_EQ(n->outputs.size(), 1UL);
ASSERT_EQ(n->outputs.size(), 2UL);
control_dep1 = n->outputs[1];
} }
if (n->Name() == "dummy") { if (n->Name() == "dummy") {
ASSERT_EQ(n->inputs[0]->Name(), "c"); ASSERT_EQ(n->inputs[0]->Name(), "c");
ASSERT_TRUE(ir::IsControlDepVar(*n->inputs[1])); ASSERT_EQ(n->inputs.size(), 1UL);
control_dep2 = n->inputs[1];
ASSERT_EQ(n->inputs.size(), 2UL);
} }
} }
ASSERT_NE(control_dep1, nullptr);
ASSERT_NE(control_dep2, nullptr);
ASSERT_EQ(control_dep1, control_dep2);
// Step3: Clone graph. // Step3: Clone graph.
std::shared_ptr<ir::Graph> clone_g = g->Clone(); std::shared_ptr<ir::Graph> clone_g = g->Clone();
......
...@@ -331,8 +331,6 @@ void BatchMergePass::ApplyImpl(ir::Graph* graph) const { ...@@ -331,8 +331,6 @@ void BatchMergePass::ApplyImpl(ir::Graph* graph) const {
copy_node(node); copy_node(node);
} }
} }
result.ResolveHazard(created);
} }
} // namespace ir } // namespace ir
......
...@@ -183,5 +183,6 @@ void NaiveExecutor::ResetTrtOps(int num) { ...@@ -183,5 +183,6 @@ void NaiveExecutor::ResetTrtOps(int num) {
} }
#endif #endif
} }
} // namespace framework } // namespace framework
} // namespace paddle } // namespace paddle
...@@ -38,8 +38,7 @@ void Analyzer::RunAnalysis(Argument *argument) { ...@@ -38,8 +38,7 @@ void Analyzer::RunAnalysis(Argument *argument) {
if (!disable_logs) { if (!disable_logs) {
string::PrettyLogH1("--- Running analysis [%s]", pass); string::PrettyLogH1("--- Running analysis [%s]", pass);
} }
if (!argument->enable_analysis_optim() && pass == "ir_analysis_pass") if (!argument->enable_ir_optim() && pass == "ir_analysis_pass") continue;
continue;
auto *ptr = PassRegistry::Global().Retreive(pass); auto *ptr = PassRegistry::Global().Retreive(pass);
PADDLE_ENFORCE_NOT_NULL(ptr, PADDLE_ENFORCE_NOT_NULL(ptr,
......
...@@ -31,7 +31,7 @@ TEST(Analyzer, analysis_without_tensorrt) { ...@@ -31,7 +31,7 @@ TEST(Analyzer, analysis_without_tensorrt) {
Argument argument; Argument argument;
argument.SetDisableLogs(false); argument.SetDisableLogs(false);
argument.SetModelDir(FLAGS_inference_model_dir); argument.SetModelDir(FLAGS_inference_model_dir);
argument.SetEnableAnalysisOptim(false); argument.SetEnableIrOptim(false);
argument.SetUseGPU(false); argument.SetUseGPU(false);
argument.SetAnalysisPasses({"ir_graph_build_pass", argument.SetAnalysisPasses({"ir_graph_build_pass",
"ir_analysis_pass", "ir_analysis_pass",
...@@ -44,7 +44,7 @@ TEST(Analyzer, analysis_without_tensorrt) { ...@@ -44,7 +44,7 @@ TEST(Analyzer, analysis_without_tensorrt) {
TEST(Analyzer, analysis_with_tensorrt) { TEST(Analyzer, analysis_with_tensorrt) {
Argument argument; Argument argument;
argument.SetDisableLogs(false); argument.SetDisableLogs(false);
argument.SetEnableAnalysisOptim(false); argument.SetEnableIrOptim(false);
argument.SetTensorRtMaxBatchSize(3); argument.SetTensorRtMaxBatchSize(3);
argument.SetTensorRtWorkspaceSize(1 << 20); argument.SetTensorRtWorkspaceSize(1 << 20);
argument.SetModelDir(FLAGS_inference_model_dir); argument.SetModelDir(FLAGS_inference_model_dir);
......
...@@ -42,8 +42,6 @@ namespace paddle { ...@@ -42,8 +42,6 @@ namespace paddle {
namespace inference { namespace inference {
namespace analysis { namespace analysis {
using framework::ir::Graph;
#ifdef PADDLE_WITH_MKLDNN #ifdef PADDLE_WITH_MKLDNN
using VarQuantScale = using VarQuantScale =
std::unordered_map<std::string, std::pair<bool, framework::LoDTensor>>; std::unordered_map<std::string, std::pair<bool, framework::LoDTensor>>;
...@@ -148,7 +146,7 @@ struct Argument { ...@@ -148,7 +146,7 @@ struct Argument {
DECL_ARGUMENT_FIELD(model_params_path, ModelParamsPath, std::string); DECL_ARGUMENT_FIELD(model_params_path, ModelParamsPath, std::string);
DECL_ARGUMENT_FIELD(model_from_memory, ModelFromMemory, bool); DECL_ARGUMENT_FIELD(model_from_memory, ModelFromMemory, bool);
DECL_ARGUMENT_FIELD(optim_cache_dir, OptimCacheDir, std::string); DECL_ARGUMENT_FIELD(optim_cache_dir, OptimCacheDir, std::string);
DECL_ARGUMENT_FIELD(enable_analysis_optim, EnableAnalysisOptim, bool); DECL_ARGUMENT_FIELD(enable_ir_optim, EnableIrOptim, bool);
// For JITLayer // For JITLayer
DECL_ARGUMENT_FIELD(skip_load_params, SkipLoadParams, bool); DECL_ARGUMENT_FIELD(skip_load_params, SkipLoadParams, bool);
...@@ -362,6 +360,8 @@ struct Argument { ...@@ -362,6 +360,8 @@ struct Argument {
DECL_ARGUMENT_FIELD(mixed_black_list, DECL_ARGUMENT_FIELD(mixed_black_list,
MixedBlackList, MixedBlackList,
std::unordered_set<std::string>); std::unordered_set<std::string>);
DECL_ARGUMENT_FIELD(enable_gpu_mixed, EnableGPUMixed, bool);
DECL_ARGUMENT_FIELD(mixed_precision_mode, MixedPrecisionMode, int);
private: private:
std::unordered_set<std::string> valid_fields_; std::unordered_set<std::string> valid_fields_;
......
...@@ -153,25 +153,6 @@ T &GetFromScope(const framework::Scope &scope, const std::string &name) { ...@@ -153,25 +153,6 @@ T &GetFromScope(const framework::Scope &scope, const std::string &name) {
return *var->GetMutable<T>(); return *var->GetMutable<T>();
} }
static framework::proto::ProgramDesc LoadProgramDesc(
const std::string &model_path) {
std::ifstream fin(model_path, std::ios::in | std::ios::binary);
PADDLE_ENFORCE_EQ(
fin.is_open(),
true,
platform::errors::NotFound(
"Cannot open file %s, please confirm whether the file exists",
model_path));
fin.seekg(0, std::ios::end);
std::string buffer(fin.tellg(), ' ');
fin.seekg(0, std::ios::beg);
fin.read(&buffer[0], buffer.size());
fin.close();
framework::proto::ProgramDesc program_desc;
program_desc.ParseFromString(buffer);
return program_desc;
}
static bool FileExists(const std::string &filepath) { static bool FileExists(const std::string &filepath) {
std::ifstream file(filepath); std::ifstream file(filepath);
bool exists = file.is_open(); bool exists = file.is_open();
......
...@@ -27,6 +27,7 @@ ...@@ -27,6 +27,7 @@
#include "paddle/fluid/framework/scope.h" #include "paddle/fluid/framework/scope.h"
#include "paddle/fluid/inference/analysis/argument.h" #include "paddle/fluid/inference/analysis/argument.h"
#include "paddle/fluid/string/pretty_log.h" #include "paddle/fluid/string/pretty_log.h"
#include "paddle/phi/core/errors.h"
namespace paddle { namespace paddle {
namespace inference { namespace inference {
...@@ -36,15 +37,6 @@ using string::PrettyLogEndl; ...@@ -36,15 +37,6 @@ using string::PrettyLogEndl;
using string::Style; using string::Style;
IRPassManager::IRPassManager(Argument *argument) { IRPassManager::IRPassManager(Argument *argument) {
ARGUMENT_CHECK_FIELD(argument, main_program);
graph_ = std::unique_ptr<Graph>(new Graph(argument->main_program()));
if (argument->Has("scope")) {
auto *scope_ptr = argument->scope_ptr();
PADDLE_ENFORCE_NOT_NULL(scope_ptr,
platform::errors::PreconditionNotMet(
"The scope ptr should not be nullptr."));
graph_->SetNotOwned(framework::ir::kParamScopeAttr, scope_ptr);
}
disable_logs_ = argument->disable_logs(); disable_logs_ = argument->disable_logs();
ARGUMENT_CHECK_FIELD(argument, ir_analysis_passes); ARGUMENT_CHECK_FIELD(argument, ir_analysis_passes);
...@@ -95,10 +87,14 @@ void IRPassManager::CreatePasses(Argument *argument, ...@@ -95,10 +87,14 @@ void IRPassManager::CreatePasses(Argument *argument,
argument->tensorrt_tuned_dynamic_shape(); argument->tensorrt_tuned_dynamic_shape();
pass->Set("with_dynamic_shape", new bool(with_dynamic_shape)); pass->Set("with_dynamic_shape", new bool(with_dynamic_shape));
// mixed precision related
pass->Set("model_precision", new int(argument->model_precision())); pass->Set("model_precision", new int(argument->model_precision()));
pass->Set( pass->Set(
"mixed_black_list", "mixed_black_list",
new std::unordered_set<std::string>(argument->mixed_black_list())); new std::unordered_set<std::string>(argument->mixed_black_list()));
pass->Set("enable_gpu_mixed", new bool(argument->enable_gpu_mixed()));
pass->Set("mixed_precision_mode",
new int(argument->mixed_precision_mode()));
if (pass_name == "graph_viz_pass") { if (pass_name == "graph_viz_pass") {
std::string optim_cache_dir = argument->optim_cache_dir(); std::string optim_cache_dir = argument->optim_cache_dir();
...@@ -302,42 +298,18 @@ void IRPassManager::CreatePasses(Argument *argument, ...@@ -302,42 +298,18 @@ void IRPassManager::CreatePasses(Argument *argument,
} }
std::unique_ptr<Graph> IRPassManager::Apply(std::unique_ptr<Graph> graph) { std::unique_ptr<Graph> IRPassManager::Apply(std::unique_ptr<Graph> graph) {
if (passes_.empty()) {
return graph;
}
PADDLE_ENFORCE_NOT_NULL( PADDLE_ENFORCE_NOT_NULL(
graph.get(), graph.get(), platform::errors::InvalidArgument("Graph cannot be null."));
platform::errors::PreconditionNotMet("Graph cannot be NULL."));
// Apply all the passes // Apply all the passes
for (const auto &pass : passes_) { for (const auto &pass : passes_) {
if (pass->Type() != "graph_viz_pass" && !disable_logs_) { if (pass->Type() != "graph_viz_pass" && !disable_logs_) {
PrettyLogEndl(Style::H2(), "--- Running IR pass [%s]", pass->Type()); PrettyLogEndl(Style::H2(), "--- Running IR pass [%s]", pass->Type());
} }
// delete_fill_constant_op_pass is not apply under trt dynamic shape
if (pass->Type() == "delete_fill_constant_op_pass") {
bool use_dynamic = pass->Get<bool>("with_dynamic_shape");
if (use_dynamic) continue;
}
graph.reset(pass->Apply(graph.release())); graph.reset(pass->Apply(graph.release()));
} }
return graph; return graph;
} }
framework::proto::ProgramDesc IRPassManager::AcquireProgram(
std::unique_ptr<Graph> *graph, ProgramDesc *program) const {
auto pass =
framework::ir::PassRegistry::Instance().Get("graph_to_program_pass");
// Direct using ProgramDesc desc(argument->main_program()) may cause
// incomplete copies of information.
ProgramDesc desc;
desc.CopyFrom(*program->Proto());
pass->SetNotOwned("program", &desc);
auto *the_graph = graph->release();
graph->reset(pass->Apply(the_graph));
return *desc.Proto();
}
} // namespace analysis } // namespace analysis
} // namespace inference } // namespace inference
} // namespace paddle } // namespace paddle
...@@ -48,15 +48,9 @@ class IRPassManager final { ...@@ -48,15 +48,9 @@ class IRPassManager final {
std::unique_ptr<Graph> Apply(std::unique_ptr<Graph> graph); std::unique_ptr<Graph> Apply(std::unique_ptr<Graph> graph);
framework::proto::ProgramDesc AcquireProgram(std::unique_ptr<Graph> *graph,
ProgramDesc *program) const;
framework::ir::Graph &graph() const { return *graph_; }
private: private:
void CreatePasses(Argument *argument, const std::vector<std::string> &passes); void CreatePasses(Argument *argument, const std::vector<std::string> &passes);
std::unique_ptr<Graph> graph_;
std::vector<std::unique_ptr<Pass>> passes_; std::vector<std::unique_ptr<Pass>> passes_;
bool disable_logs_{false}; bool disable_logs_{false};
}; };
......
...@@ -94,14 +94,14 @@ void OutputProcess(framework::ir::Graph *graph, ...@@ -94,14 +94,14 @@ void OutputProcess(framework::ir::Graph *graph,
backend, backend,
precision, precision,
blacklist)) { blacklist)) {
AddCastOp(graph, InsertCastOp(graph,
var_node, var_node,
next_op, next_op,
framework::proto::VarType::FP32, framework::proto::VarType::FP32,
to_type, to_type,
&suffix, block_desc,
block_desc, &suffix,
&var_to_cast_op_map); &var_to_cast_op_map);
var_node->Var()->SetDataType(framework::proto::VarType::FP32); var_node->Var()->SetDataType(framework::proto::VarType::FP32);
} }
} }
......
...@@ -13,7 +13,7 @@ cc_library( ...@@ -13,7 +13,7 @@ cc_library(
cc_library( cc_library(
convert_to_mixed_precision convert_to_mixed_precision
SRCS convert_to_mixed_precision.cc SRCS convert_to_mixed_precision.cc
DEPS analysis_pass ir_graph_build_pass) DEPS analysis_pass ir_graph_build_pass auto_mixed_precision_pass)
cc_library( cc_library(
ir_params_sync_among_devices_pass ir_params_sync_among_devices_pass
SRCS ir_params_sync_among_devices_pass.cc SRCS ir_params_sync_among_devices_pass.cc
...@@ -30,17 +30,6 @@ cc_library( ...@@ -30,17 +30,6 @@ cc_library(
inference_op_replace_pass inference_op_replace_pass
SRCS inference_op_replace_pass.cc SRCS inference_op_replace_pass.cc
DEPS analysis_pass graph_to_program_pass) DEPS analysis_pass graph_to_program_pass)
if(WITH_TESTING)
cc_library(
ir_graph_clean_pass
SRCS ir_graph_clean_pass.cc
DEPS analysis_pass gtest)
else()
cc_library(
ir_graph_clean_pass
SRCS ir_graph_clean_pass.cc
DEPS analysis_pass)
endif()
cc_library( cc_library(
analysis_passes analysis_passes
...@@ -52,8 +41,7 @@ cc_library( ...@@ -52,8 +41,7 @@ cc_library(
memory_optim_pass memory_optim_pass
convert_to_mixed_precision convert_to_mixed_precision
inference_op_replace_pass inference_op_replace_pass
ir_graph_to_program_pass ir_graph_to_program_pass)
ir_graph_clean_pass)
set(analysis_deps set(analysis_deps
${analysis_deps} analysis_passes subgraph_detector ${analysis_deps} analysis_passes subgraph_detector
......
...@@ -15,14 +15,12 @@ ...@@ -15,14 +15,12 @@
#pragma once #pragma once
#include <string> #include <string>
#include <unordered_map>
#include <unordered_set> #include <unordered_set>
#include "paddle/fluid/framework/block_desc.h" #include "paddle/fluid/framework/block_desc.h"
#include "paddle/fluid/framework/ir/graph.h" #include "paddle/fluid/framework/ir/graph.h"
#include "paddle/fluid/framework/ir/graph_helper.h"
#include "paddle/fluid/framework/ir/graph_pattern_detector.h"
#include "paddle/fluid/framework/program_desc.h" #include "paddle/fluid/framework/program_desc.h"
#include "paddle/fluid/framework/scope.h"
#include "paddle/phi/common/backend.h" #include "paddle/phi/common/backend.h"
#include "paddle/phi/common/data_type.h" #include "paddle/phi/common/data_type.h"
...@@ -30,20 +28,52 @@ namespace paddle { ...@@ -30,20 +28,52 @@ namespace paddle {
namespace inference { namespace inference {
namespace analysis { namespace analysis {
class ConvertToMixedPrecisionPass {
public:
explicit ConvertToMixedPrecisionPass(
const std::string& model_file,
const std::string& params_file,
const std::string& mixed_model_file,
const std::string& mixed_params_file,
phi::DataType mixed_precision,
phi::Backend backend,
bool keep_io_types,
const std::unordered_set<std::string>& black_list);
void Run();
private:
void LoadModel();
void SaveMixedModel();
private:
std::string model_file_;
std::string params_file_;
std::string mixed_model_file_;
std::string mixed_params_file_;
phi::DataType mixed_precision_;
phi::Backend backend_;
bool keep_io_types_;
std::unordered_set<std::string> black_list_;
framework::Scope scope_;
std::unique_ptr<framework::ir::Graph> main_graph_{nullptr};
};
bool OpSupportPrecision(const std::string& op_type, bool OpSupportPrecision(const std::string& op_type,
phi::Backend backend, phi::Backend backend,
phi::DataType precision, phi::DataType precision,
const std::unordered_set<std::string>& blacklist); const std::unordered_set<std::string>& black_list);
void AddCastOp( void InsertCastOp(
framework::ir::Graph* graph, framework::ir::Graph* graph,
framework::ir::Node* node, framework::ir::Node* var_node,
framework::ir::Node* next_op, framework::ir::Node* op_node,
framework::proto::VarType::Type from_type, framework::proto::VarType::Type from_type,
framework::proto::VarType::Type to_type, framework::proto::VarType::Type to_type,
int* suffix,
framework::BlockDesc* block_desc, framework::BlockDesc* block_desc,
std::unordered_map<framework::ir::Node*, framework::ir::Node*>* map); int* suffix,
std::unordered_map<framework::ir::Node*, framework::ir::Node*>* visited);
void ConvertToMixedPrecision(const std::string& model_file, void ConvertToMixedPrecision(const std::string& model_file,
const std::string& params_file, const std::string& params_file,
...@@ -51,8 +81,8 @@ void ConvertToMixedPrecision(const std::string& model_file, ...@@ -51,8 +81,8 @@ void ConvertToMixedPrecision(const std::string& model_file,
const std::string& mixed_params_file, const std::string& mixed_params_file,
phi::DataType mixed_precision, phi::DataType mixed_precision,
phi::Backend backend, phi::Backend backend,
bool keep_io_types = true, bool keep_io_types,
std::unordered_set<std::string> black_list = {}); const std::unordered_set<std::string>& black_list);
} // namespace analysis } // namespace analysis
} // namespace inference } // namespace inference
......
...@@ -40,7 +40,7 @@ void InferenceOpReplacePass::RunImpl(Argument* argument) { ...@@ -40,7 +40,7 @@ void InferenceOpReplacePass::RunImpl(Argument* argument) {
} }
std::string InferenceOpReplacePass::repr() const { std::string InferenceOpReplacePass::repr() const {
return "inference-op-replace-pass"; return "inference_op_replace_pass";
} }
} // namespace analysis } // namespace analysis
......
...@@ -105,7 +105,7 @@ void IrAnalysisPass::CollectFusionStatis(Argument* argument) { ...@@ -105,7 +105,7 @@ void IrAnalysisPass::CollectFusionStatis(Argument* argument) {
framework::ir::kFuseStatisAttr)); framework::ir::kFuseStatisAttr));
} }
std::string IrAnalysisPass::repr() const { return "ir-analysis-pass"; } std::string IrAnalysisPass::repr() const { return "ir_analysis_pass"; }
} // namespace analysis } // namespace analysis
} // namespace inference } // namespace inference
......
...@@ -64,7 +64,8 @@ void IrGraphBuildPass::RunImpl(Argument *argument) { ...@@ -64,7 +64,8 @@ void IrGraphBuildPass::RunImpl(Argument *argument) {
"set.")); "set."));
} }
auto graph = std::unique_ptr<Graph>(new Graph(argument->main_program())); auto graph = std::unique_ptr<framework::ir::Graph>(
new framework::ir::Graph(argument->main_program()));
argument->SetMainGraph(graph.release()); argument->SetMainGraph(graph.release());
auto *scope_ptr = argument->scope_ptr(); auto *scope_ptr = argument->scope_ptr();
PADDLE_ENFORCE_NOT_NULL(scope_ptr, PADDLE_ENFORCE_NOT_NULL(scope_ptr,
...@@ -125,7 +126,7 @@ std::unique_ptr<framework::ProgramDesc> IrGraphBuildPass::LoadModel( ...@@ -125,7 +126,7 @@ std::unique_ptr<framework::ProgramDesc> IrGraphBuildPass::LoadModel(
} }
} }
std::string IrGraphBuildPass::repr() const { return "ir-graph-build-pass"; } std::string IrGraphBuildPass::repr() const { return "ir_graph_build_pass"; }
} // namespace analysis } // namespace analysis
} // namespace inference } // namespace inference
......
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/inference/analysis/passes/ir_graph_clean_pass.h"
#include "paddle/fluid/framework/ir/graph.h"
#include "paddle/fluid/framework/ir/graph_pattern_detector.h"
#include "paddle/fluid/framework/ir/node.h"
namespace paddle {
namespace inference {
namespace analysis {
void IrInferCleanGraphPass::RunImpl(Argument* argument) {
auto& graph = argument->main_graph();
auto is_valid_node = [](framework::ir::Node* x) {
return x && IsControlDepVar(*x) && x->IsVar() && !x->Var();
};
std::unordered_set<const framework::ir::Node*> invalid_nodes;
int valid_op = 0;
for (auto* node : graph.Nodes()) {
PADDLE_ENFORCE_NOT_NULL(node,
platform::errors::PreconditionNotMet(
"The node should not be nullptr."));
if (is_valid_node(node)) {
invalid_nodes.insert(node);
} else if (node->IsOp()) {
++valid_op;
}
}
GraphSafeRemoveNodes(&graph, invalid_nodes);
}
} // namespace analysis
} // namespace inference
} // namespace paddle
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <string>
#include <unordered_set>
#include "paddle/fluid/inference/analysis/analysis_pass.h"
namespace paddle {
namespace inference {
namespace analysis {
struct Argument;
class IrInferCleanGraphPass : public AnalysisPass {
public:
void RunImpl(Argument *argument) override;
std::string repr() const override { return "ir_graph_clean_pass"; }
};
} // namespace analysis
} // namespace inference
} // namespace paddle
...@@ -31,7 +31,7 @@ void IrGraphToProgramPass::RunImpl(Argument *argument) { ...@@ -31,7 +31,7 @@ void IrGraphToProgramPass::RunImpl(Argument *argument) {
new int(argument->memory_optim_sort_kind())); new int(argument->memory_optim_sort_kind()));
} }
std::unique_ptr<Graph> graph(argument->main_graph_ptr()); std::unique_ptr<framework::ir::Graph> graph(argument->main_graph_ptr());
// Direct using ProgramDesc desc(argument->main_program()) may cause // Direct using ProgramDesc desc(argument->main_program()) may cause
// incomplete copies of information. // incomplete copies of information.
......
...@@ -28,7 +28,7 @@ class IrGraphToProgramPass : public AnalysisPass { ...@@ -28,7 +28,7 @@ class IrGraphToProgramPass : public AnalysisPass {
public: public:
void RunImpl(Argument *argument) override; void RunImpl(Argument *argument) override;
std::string repr() const override { return "ir-graph-to-param-pass"; } std::string repr() const override { return "ir_graph_to_param_pass"; }
}; };
} // namespace analysis } // namespace analysis
......
...@@ -169,7 +169,7 @@ void IrParamsSyncAmongDevicesPass::RunImpl(Argument *argument) { ...@@ -169,7 +169,7 @@ void IrParamsSyncAmongDevicesPass::RunImpl(Argument *argument) {
} }
std::string IrParamsSyncAmongDevicesPass::repr() const { std::string IrParamsSyncAmongDevicesPass::repr() const {
return "ir-params-sync-among-devices-pass"; return "ir_params_sync_among_devices_pass";
} }
} // namespace analysis } // namespace analysis
......
...@@ -295,7 +295,7 @@ void UpdateOpDescsByReuse( ...@@ -295,7 +295,7 @@ void UpdateOpDescsByReuse(
} }
} }
std::string MemoryOptimizePass::repr() const { return "memory optimize pass"; } std::string MemoryOptimizePass::repr() const { return "memory_optimize_pass"; }
void MemoryOptimizePass::RunImpl(Argument* argument) { void MemoryOptimizePass::RunImpl(Argument* argument) {
// Memory optimization. // Memory optimization.
......
...@@ -18,7 +18,6 @@ ...@@ -18,7 +18,6 @@
#include "paddle/fluid/inference/analysis/passes/inference_op_replace_pass.h" #include "paddle/fluid/inference/analysis/passes/inference_op_replace_pass.h"
#include "paddle/fluid/inference/analysis/passes/ir_analysis_pass.h" #include "paddle/fluid/inference/analysis/passes/ir_analysis_pass.h"
#include "paddle/fluid/inference/analysis/passes/ir_graph_build_pass.h" #include "paddle/fluid/inference/analysis/passes/ir_graph_build_pass.h"
#include "paddle/fluid/inference/analysis/passes/ir_graph_clean_pass.h"
#include "paddle/fluid/inference/analysis/passes/ir_graph_to_program_pass.h" #include "paddle/fluid/inference/analysis/passes/ir_graph_to_program_pass.h"
#include "paddle/fluid/inference/analysis/passes/ir_params_sync_among_devices_pass.h" #include "paddle/fluid/inference/analysis/passes/ir_params_sync_among_devices_pass.h"
#include "paddle/fluid/inference/analysis/passes/memory_optimize_pass.h" #include "paddle/fluid/inference/analysis/passes/memory_optimize_pass.h"
...@@ -34,8 +33,6 @@ PassRegistry::PassRegistry() { ...@@ -34,8 +33,6 @@ PassRegistry::PassRegistry() {
std::unique_ptr<AnalysisPass>(new IrAnalysisPass)); std::unique_ptr<AnalysisPass>(new IrAnalysisPass));
passes_.emplace("ir_graph_build_pass", passes_.emplace("ir_graph_build_pass",
std::unique_ptr<AnalysisPass>(new IrGraphBuildPass)); std::unique_ptr<AnalysisPass>(new IrGraphBuildPass));
passes_.emplace("ir_graph_clean_pass",
std::unique_ptr<AnalysisPass>(new IrInferCleanGraphPass));
passes_.emplace("memory_optimize_pass", passes_.emplace("memory_optimize_pass",
std::unique_ptr<AnalysisPass>(new MemoryOptimizePass)); std::unique_ptr<AnalysisPass>(new MemoryOptimizePass));
passes_.emplace( passes_.emplace(
......
...@@ -85,15 +85,29 @@ void AnalysisConfig::SetModel(const std::string &prog_file_path, ...@@ -85,15 +85,29 @@ void AnalysisConfig::SetModel(const std::string &prog_file_path,
Update(); Update();
} }
void AnalysisConfig::EnableUseGpu(uint64_t memory_pool_init_size_mb, void AnalysisConfig::EnableUseGpu(uint64_t memory_pool_init_size_mb,
int device_id) { int device_id,
Precision precision_mode) {
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
use_gpu_ = true; use_gpu_ = true;
memory_pool_init_size_mb_ = memory_pool_init_size_mb; memory_pool_init_size_mb_ = memory_pool_init_size_mb;
FLAGS_initial_gpu_memory_in_mb = memory_pool_init_size_mb_; FLAGS_initial_gpu_memory_in_mb = memory_pool_init_size_mb_;
gpu_device_id_ = device_id; gpu_device_id_ = device_id;
mixed_precision_mode_ = precision_mode;
if (precision_mode == Precision::kFloat32) {
// default
} else if (precision_mode == Precision::kHalf ||
precision_mode == Precision::kBf16) {
enable_gpu_mixed_ = true;
} else {
LOG(ERROR)
<< "The Paddle-GPU inference currently only supports "
"float32/float16/bfloat16 precision. Please check the parameters "
"you specified in EnableUseGpu or enable_use_gpu function.";
}
#else #else
LOG(ERROR) << "Please compile with gpu to EnableGpu()"; LOG(ERROR) << "Please use PaddlePaddle with GPU version.";
use_gpu_ = false; use_gpu_ = false;
#endif #endif
...@@ -279,7 +293,7 @@ void AnalysisConfig::LoadIpuConfig(const std::string &config_path) { ...@@ -279,7 +293,7 @@ void AnalysisConfig::LoadIpuConfig(const std::string &config_path) {
if (ipu_config_mapper_.find(key) == ipu_config_mapper_.end()) { if (ipu_config_mapper_.find(key) == ipu_config_mapper_.end()) {
PADDLE_THROW(platform::errors::InvalidArgument( PADDLE_THROW(platform::errors::InvalidArgument(
"invalid key {} in IPU config", key)); "invalid key %s in IPU config: ", key));
} }
switch (ipu_config_mapper_.at(key)) { switch (ipu_config_mapper_.at(key)) {
case ipu_config_code::ipu_device_num: case ipu_config_code::ipu_device_num:
...@@ -315,7 +329,7 @@ void AnalysisConfig::LoadIpuConfig(const std::string &config_path) { ...@@ -315,7 +329,7 @@ void AnalysisConfig::LoadIpuConfig(const std::string &config_path) {
default: default:
PADDLE_THROW(platform::errors::InvalidArgument( PADDLE_THROW(platform::errors::InvalidArgument(
"invalid key {} in IPU config", key)); "invalid key %s in IPU config", key));
break; break;
} }
} }
...@@ -372,8 +386,10 @@ AnalysisConfig::AnalysisConfig(const AnalysisConfig &other) { ...@@ -372,8 +386,10 @@ AnalysisConfig::AnalysisConfig(const AnalysisConfig &other) {
CP_MEMBER(gpu_device_id_); CP_MEMBER(gpu_device_id_);
CP_MEMBER(memory_pool_init_size_mb_); CP_MEMBER(memory_pool_init_size_mb_);
// Mixed related. // Mixed precision related.
CP_MEMBER(mixed_black_list_); CP_MEMBER(mixed_black_list_);
CP_MEMBER(enable_gpu_mixed_);
CP_MEMBER(mixed_precision_mode_);
CP_MEMBER(enable_memory_optim_); CP_MEMBER(enable_memory_optim_);
// TensorRT related. // TensorRT related.
...@@ -740,13 +756,7 @@ void AnalysisConfig::Update() { ...@@ -740,13 +756,7 @@ void AnalysisConfig::Update() {
((use_custom_device() ^ pass_builder_->use_custom_device()))) { ((use_custom_device() ^ pass_builder_->use_custom_device()))) {
if (use_gpu()) { if (use_gpu()) {
pass_builder_.reset(new GpuPassStrategy); pass_builder_.reset(new GpuPassStrategy);
if (use_tensorrt_) {
// Append after the Affine_channel_conv_fuse pass.
pass_builder()->InsertPass(3, "tensorrt_subgraph_pass");
}
} else if (use_ipu()) { } else if (use_ipu()) {
VLOG(1) << "IpuPassStrategy has been used for new.";
pass_builder_.reset(new IpuPassStrategy); pass_builder_.reset(new IpuPassStrategy);
} else if (use_xpu()) { } else if (use_xpu()) {
PADDLE_ENFORCE_EQ( PADDLE_ENFORCE_EQ(
...@@ -946,9 +956,6 @@ void AnalysisConfig::Update() { ...@@ -946,9 +956,6 @@ void AnalysisConfig::Update() {
"but did not have the option -DWITH_CUSTOM_DEVICE compiled.")); "but did not have the option -DWITH_CUSTOM_DEVICE compiled."));
#endif #endif
} }
if (ir_debug_) {
pass_builder()->TurnOnDebug();
}
} }
std::string AnalysisConfig::SerializeInfoCache() { std::string AnalysisConfig::SerializeInfoCache() {
...@@ -960,6 +967,7 @@ std::string AnalysisConfig::SerializeInfoCache() { ...@@ -960,6 +967,7 @@ std::string AnalysisConfig::SerializeInfoCache() {
ss << calibration_file_path_; ss << calibration_file_path_;
ss << use_gpu_; ss << use_gpu_;
ss << enable_gpu_mixed_;
ss << use_external_stream_; ss << use_external_stream_;
ss << exec_stream_; ss << exec_stream_;
ss << use_fc_padding_; ss << use_fc_padding_;
...@@ -1167,6 +1175,7 @@ std::string AnalysisConfig::Summary() { ...@@ -1167,6 +1175,7 @@ std::string AnalysisConfig::Summary() {
os.InsertRow({"use_gpu", use_gpu_ ? "true" : "false"}); os.InsertRow({"use_gpu", use_gpu_ ? "true" : "false"});
if (use_gpu_) { if (use_gpu_) {
os.InsertRow({"gpu_device_id", std::to_string(gpu_device_id_)}); os.InsertRow({"gpu_device_id", std::to_string(gpu_device_id_)});
os.InsertRow({"enable_gpu_mixed", std::to_string(enable_gpu_mixed_)});
os.InsertRow({"memory_pool_init_size", os.InsertRow({"memory_pool_init_size",
std::to_string(memory_pool_init_size_mb_) + "MB"}); std::to_string(memory_pool_init_size_mb_) + "MB"});
os.InsertRow( os.InsertRow(
...@@ -1360,7 +1369,7 @@ bool AnalysisConfig::trt_allow_build_at_runtime() { ...@@ -1360,7 +1369,7 @@ bool AnalysisConfig::trt_allow_build_at_runtime() {
return trt_allow_build_at_runtime_; return trt_allow_build_at_runtime_;
} }
void AnalysisConfig::Exp_SetBlackListOpsForMixedModel( void AnalysisConfig::Exp_DisableMixedPrecisionOps(
const std::unordered_set<std::string> &black_list) { const std::unordered_set<std::string> &black_list) {
mixed_black_list_ = black_list; mixed_black_list_ = black_list;
} }
......
...@@ -1065,7 +1065,7 @@ void AnalysisPredictor::PrepareArgument() { ...@@ -1065,7 +1065,7 @@ void AnalysisPredictor::PrepareArgument() {
argument_.SetUseGPU(config_.use_gpu()); argument_.SetUseGPU(config_.use_gpu());
argument_.SetUseFcPadding(config_.use_fc_padding()); argument_.SetUseFcPadding(config_.use_fc_padding());
argument_.SetGPUDeviceId(config_.gpu_device_id()); argument_.SetGPUDeviceId(config_.gpu_device_id());
argument_.SetEnableAnalysisOptim(config_.enable_ir_optim_); argument_.SetEnableIrOptim(config_.enable_ir_optim_);
argument_.SetEnableMemoryOptim(config_.enable_memory_optim()); argument_.SetEnableMemoryOptim(config_.enable_memory_optim());
argument_.SetModelFromMemory(config_.model_from_memory_); argument_.SetModelFromMemory(config_.model_from_memory_);
// Analyze inference_program // Analyze inference_program
...@@ -1210,53 +1210,57 @@ void AnalysisPredictor::PrepareArgument() { ...@@ -1210,53 +1210,57 @@ void AnalysisPredictor::PrepareArgument() {
} }
#endif #endif
auto passes = config_.pass_builder()->AllPasses(); auto *pass_builder = config_.pass_builder();
if (model_precision_ != phi::DataType::FLOAT32) { if (model_precision_ != phi::DataType::FLOAT32) {
LOG(INFO) << "Model is mixed precision type with " << model_precision_ LOG(INFO) << "Model is mixed precision type with " << model_precision_
<< ", we will use a new PassStrategy. Note that only the GPU " << ", we will use a new PassStrategy. Note that only the GPU "
"backend is supported for now."; "backend is supported for now.";
passes.clear(); pass_builder->ClearPasses();
const auto &deleted_passes = pass_builder->GetAllDeletedPasses();
if (config_.tensorrt_engine_enabled()) { if (config_.tensorrt_engine_enabled()) {
for (const auto &pass : kTrtLowerPrecisionPasses) { for (const auto &pass : kTrtLowerPrecisionPasses) {
passes.push_back(pass); if (deleted_passes.count(pass)) continue;
pass_builder->AppendPass(pass);
} }
} else if (config_.use_gpu()) { } else if (config_.use_gpu()) {
for (const auto &pass : kGpuLowerPrecisionPasses) { for (const auto &pass : kGpuLowerPrecisionPasses) {
passes.push_back(pass); if (deleted_passes.count(pass)) continue;
pass_builder->AppendPass(pass);
} }
} }
}
const auto &deleted_passes = config_.pass_builder()->GetAllDeletedPasses(); if (!config_.ir_optim()) {
for (const auto &it : deleted_passes) { argument_.SetEnableIrOptim(false);
auto iterator = std::find(passes.begin(), passes.end(), it); if (config_.enable_gpu_mixed_) {
if (iterator != passes.end()) { argument_.SetEnableIrOptim(true);
passes.erase(iterator); pass_builder->ClearPasses();
} pass_builder->AppendPass("auto_mixed_precision_pass");
LOG(INFO)
<< "This model run in Paddle-GPU mixed precision mode with no ir "
"optimization.";
} else {
LOG(INFO) << "ir_optim is turned off, no IR pass will be executed.";
} }
} else {
if (config_.ir_debug_) { if (config_.ir_debug_) {
auto it = std::begin(passes); pass_builder->TurnOnDebug();
while (it != std::end(passes)) { }
if (*it != "graph_viz_pass") { if (config_.enable_gpu_mixed_) {
it = passes.insert(it + 1, "graph_viz_pass"); LOG(INFO) << "This model run in Paddle-GPU mixed precision mode.";
} else {
++it;
}
}
} }
}
if (!config_.ir_optim()) {
passes.clear();
LOG(INFO) << "ir_optim is turned off, no IR pass will be executed";
} }
argument_.SetDisableLogs(config_.glog_info_disabled()); argument_.SetDisableLogs(config_.glog_info_disabled());
argument_.SetIrAnalysisPasses(passes); argument_.SetIrAnalysisPasses(pass_builder->AllPasses());
argument_.SetAnalysisPasses(config_.pass_builder()->AnalysisPasses()); argument_.SetAnalysisPasses(pass_builder->AnalysisPasses());
argument_.SetScopeNotOwned(scope_.get()); argument_.SetScopeNotOwned(scope_.get());
// mixed precison. // mixed precison.
argument_.SetModelPrecision(static_cast<int>(model_precision_)); argument_.SetModelPrecision(static_cast<int>(model_precision_));
argument_.SetMixedBlackList(config_.mixed_black_list_); argument_.SetMixedBlackList(config_.mixed_black_list_);
argument_.SetEnableGPUMixed(config_.enable_gpu_mixed_);
argument_.SetMixedPrecisionMode(static_cast<int>(
paddle::ConvertPrecision(config_.mixed_precision_mode_)));
} }
// NOTE All the members in AnalysisConfig should be copied to Argument. // NOTE All the members in AnalysisConfig should be copied to Argument.
...@@ -2107,7 +2111,9 @@ std::unique_ptr<PaddlePredictor> AnalysisPredictor::Clone(void *stream) { ...@@ -2107,7 +2111,9 @@ std::unique_ptr<PaddlePredictor> AnalysisPredictor::Clone(void *stream) {
} }
x->predictor_stream_ = stream; x->predictor_stream_ = stream;
x->Init(scope_, inference_program_); x->Init(scope_, inference_program_);
#ifdef PADDLE_WITH_TENSORRT
x->executor_->ResetTrtOps(++AnalysisPredictor::clone_num_); x->executor_->ResetTrtOps(++AnalysisPredictor::clone_num_);
#endif
return std::unique_ptr<PaddlePredictor>(x); return std::unique_ptr<PaddlePredictor>(x);
} }
......
...@@ -604,10 +604,8 @@ void AnalysisPredictor::MkldnnQuantizer::PrepareArgument() const { ...@@ -604,10 +604,8 @@ void AnalysisPredictor::MkldnnQuantizer::PrepareArgument() const {
if (predictor_.config_.ir_debug_) builder->TurnOnDebug(); if (predictor_.config_.ir_debug_) builder->TurnOnDebug();
auto passes = builder->AllPasses(); auto passes = builder->AllPasses();
predictor_.argument_.SetIrAnalysisPasses(passes); predictor_.argument_.SetIrAnalysisPasses(passes);
predictor_.argument_.SetAnalysisPasses({"ir_graph_clean_pass", predictor_.argument_.SetAnalysisPasses(
"ir_analysis_pass", {"ir_analysis_pass", "memory_optimize_pass", "ir_graph_to_program_pass"});
"memory_optimize_pass",
"ir_graph_to_program_pass"});
predictor_.argument_.SetQuantVarScales(scales_); predictor_.argument_.SetQuantVarScales(scales_);
} }
......
...@@ -247,8 +247,12 @@ struct PD_INFER_DECL AnalysisConfig { ...@@ -247,8 +247,12 @@ struct PD_INFER_DECL AnalysisConfig {
/// ///
/// \param memory_pool_init_size_mb initial size of the GPU memory pool in MB. /// \param memory_pool_init_size_mb initial size of the GPU memory pool in MB.
/// \param device_id device_id the GPU card to use (default is 0). /// \param device_id device_id the GPU card to use (default is 0).
/// \param precision the precision used in Paddle-GPU inference.
/// ///
void EnableUseGpu(uint64_t memory_pool_init_size_mb, int device_id = 0); void EnableUseGpu(uint64_t memory_pool_init_size_mb,
int device_id = 0,
Precision precision_mode = Precision::kFloat32);
/// ///
/// \brief Turn off GPU. /// \brief Turn off GPU.
/// ///
...@@ -967,7 +971,7 @@ struct PD_INFER_DECL AnalysisConfig { ...@@ -967,7 +971,7 @@ struct PD_INFER_DECL AnalysisConfig {
/// interface is in the experimental stage and may change in the future. Note /// interface is in the experimental stage and may change in the future. Note
/// that the blacklist must be the same as the model conversion blacklist. /// that the blacklist must be the same as the model conversion blacklist.
/// ///
void Exp_SetBlackListOpsForMixedModel( void Exp_DisableMixedPrecisionOps(
const std::unordered_set<std::string>& black_list); const std::unordered_set<std::string>& black_list);
void SetApplyOptim(bool value) { apply_optim_ = value; } void SetApplyOptim(bool value) { apply_optim_ = value; }
...@@ -987,13 +991,15 @@ struct PD_INFER_DECL AnalysisConfig { ...@@ -987,13 +991,15 @@ struct PD_INFER_DECL AnalysisConfig {
mutable std::string params_file_; mutable std::string params_file_;
mutable std::string calibration_file_path_; mutable std::string calibration_file_path_;
// Mixed precision. // Mixed precision related.
Precision mixed_precision_mode_{Precision::kFloat32};
std::unordered_set<std::string> mixed_black_list_; std::unordered_set<std::string> mixed_black_list_;
// GPU related. // GPU related.
bool use_gpu_{false}; bool use_gpu_{false};
int gpu_device_id_{0}; int gpu_device_id_{0};
uint64_t memory_pool_init_size_mb_{100}; // initial size is 100MB. uint64_t memory_pool_init_size_mb_{100}; // initial size is 100MB.
bool enable_gpu_mixed_{false};
bool thread_local_stream_{false}; bool thread_local_stream_{false};
bool use_cudnn_{false}; bool use_cudnn_{false};
......
...@@ -227,9 +227,10 @@ GpuPassStrategy::GpuPassStrategy() : PassStrategy({}) { ...@@ -227,9 +227,10 @@ GpuPassStrategy::GpuPassStrategy() : PassStrategy({}) {
"conv_elementwise_add_fuse_pass", // "conv_elementwise_add_fuse_pass", //
#endif // #endif //
"transpose_flatten_concat_fuse_pass", // "transpose_flatten_concat_fuse_pass", //
"constant_folding_pass", "constant_folding_pass", //
// following pass should be located in the last, since it will // following pass should be located in the last, since it will
// work on all fused ops. // work on all fused ops.
"auto_mixed_precision_pass", //
"runtime_context_cache_pass" "runtime_context_cache_pass"
}); });
......
...@@ -115,7 +115,6 @@ class PD_INFER_DECL PaddlePassBuilder { ...@@ -115,7 +115,6 @@ class PD_INFER_DECL PaddlePassBuilder {
/// \cond Protected /// \cond Protected
std::vector<std::string> analysis_passes_{ std::vector<std::string> analysis_passes_{
{"ir_graph_build_pass", {"ir_graph_build_pass",
"ir_graph_clean_pass",
"ir_analysis_pass", "ir_analysis_pass",
"ir_params_sync_among_devices_pass", "ir_params_sync_among_devices_pass",
"adjust_cudnn_workspace_size_pass", "adjust_cudnn_workspace_size_pass",
......
...@@ -294,15 +294,6 @@ class TensorRTEngine { ...@@ -294,15 +294,6 @@ class TensorRTEngine {
nvinfer1::ICudaEngine* engine() { return infer_engine_.get(); } nvinfer1::ICudaEngine* engine() { return infer_engine_.get(); }
nvinfer1::IExecutionContext* context() { nvinfer1::IExecutionContext* context() {
#ifndef PADDLE_WITH_TESTING
PADDLE_ENFORCE_GT(
predictor_id_per_thread,
-1,
platform::errors::InvalidArgument(
"thread local var predictor_id_per_thread must be "
"initialized to >= 0, but now predictor_id_per_thread = %d",
predictor_id_per_thread));
#endif
std::unique_lock<std::mutex> lock(mutex_); std::unique_lock<std::mutex> lock(mutex_);
if (infer_context_.find(predictor_id_per_thread) == infer_context_.end()) { if (infer_context_.find(predictor_id_per_thread) == infer_context_.end()) {
PADDLE_ENFORCE_NOT_NULL( PADDLE_ENFORCE_NOT_NULL(
...@@ -329,15 +320,6 @@ class TensorRTEngine { ...@@ -329,15 +320,6 @@ class TensorRTEngine {
int GetProfileIndex() { int GetProfileIndex() {
if (max_profile_num_ > 1) { if (max_profile_num_ > 1) {
#ifndef PADDLE_WITH_TESTING
PADDLE_ENFORCE_GT(
predictor_id_per_thread,
-1,
platform::errors::InvalidArgument(
"thread local var predictor_id_per_thread must be "
"initialized to >= 0, but now predictor_id_per_thread = %d",
predictor_id_per_thread));
#endif
std::unique_lock<std::mutex> lock(mutex_); std::unique_lock<std::mutex> lock(mutex_);
return profile_index_[predictor_id_per_thread]; return profile_index_[predictor_id_per_thread];
} else { } else {
...@@ -356,15 +338,6 @@ class TensorRTEngine { ...@@ -356,15 +338,6 @@ class TensorRTEngine {
infer_engine_, infer_engine_,
platform::errors::InvalidArgument( platform::errors::InvalidArgument(
"You should build engine first and then set the context.")); "You should build engine first and then set the context."));
#ifndef PADDLE_WITH_TESTING
PADDLE_ENFORCE_GT(
predictor_id_per_thread,
-1,
platform::errors::InvalidArgument(
"thread local var predictor_id_per_thread must be "
"initialized to >= 0, but now predictor_id_per_thread = %d",
predictor_id_per_thread));
#endif
std::unique_lock<std::mutex> lock(mutex_); std::unique_lock<std::mutex> lock(mutex_);
infer_context_[predictor_id_per_thread].reset(nullptr); infer_context_[predictor_id_per_thread].reset(nullptr);
infer_context_.erase(predictor_id_per_thread); infer_context_.erase(predictor_id_per_thread);
......
...@@ -416,6 +416,9 @@ download_result(${ERNIE_INSTALL_DIR} "Ernie_result.txt.tar.gz" ...@@ -416,6 +416,9 @@ download_result(${ERNIE_INSTALL_DIR} "Ernie_result.txt.tar.gz"
if(WITH_GPU) if(WITH_GPU)
inference_analysis_api_test(test_analyzer_ernie ${ERNIE_INSTALL_DIR} inference_analysis_api_test(test_analyzer_ernie ${ERNIE_INSTALL_DIR}
analyzer_ernie_tester.cc) analyzer_ernie_tester.cc)
inference_analysis_api_test(gpu_ernie_half_test ${ERNIE_INSTALL_DIR}
gpu_ernie_half_test.cc)
set_tests_properties(gpu_ernie_half_test PROPERTIES TIMEOUT 60)
endif() endif()
inference_analysis_api_int8_test(test_analyzer_ernie_int8 ${ERNIE_INSTALL_DIR} inference_analysis_api_int8_test(test_analyzer_ernie_int8 ${ERNIE_INSTALL_DIR}
analyzer_ernie_int8_tester.cc) analyzer_ernie_int8_tester.cc)
......
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/inference/api/paddle_inference_api.h"
#include "paddle/fluid/inference/tests/api/tester_helper.h"
namespace paddle {
namespace inference {
using paddle::PaddleTensor;
template <typename T>
void GetValueFromStream(std::stringstream *ss, T *t) {
(*ss) >> (*t);
}
template <>
void GetValueFromStream<std::string>(std::stringstream *ss, std::string *t) {
*t = ss->str();
}
// Split string to vector
template <typename T>
void Split(const std::string &line, char sep, std::vector<T> *v) {
std::stringstream ss;
T t;
for (auto c : line) {
if (c != sep) {
ss << c;
} else {
GetValueFromStream<T>(&ss, &t);
v->push_back(std::move(t));
ss.str({});
ss.clear();
}
}
if (!ss.str().empty()) {
GetValueFromStream<T>(&ss, &t);
v->push_back(std::move(t));
ss.str({});
ss.clear();
}
}
// Parse tensor from string
template <typename T>
bool ParseTensor(const std::string &field, paddle::PaddleTensor *tensor) {
std::vector<std::string> data;
Split(field, ':', &data);
if (data.size() < 2) return false;
std::string shape_str = data[0];
std::vector<int> shape;
Split(shape_str, ' ', &shape);
std::string mat_str = data[1];
std::vector<T> mat;
Split(mat_str, ' ', &mat);
tensor->shape = shape;
auto size =
std::accumulate(shape.begin(), shape.end(), 1, std::multiplies<int>()) *
sizeof(T);
tensor->data.Resize(size);
std::copy(mat.begin(), mat.end(), static_cast<T *>(tensor->data.data()));
tensor->dtype = GetPaddleDType<T>();
return true;
}
// Parse input tensors from string
bool ParseLine(const std::string &line,
std::vector<paddle::PaddleTensor> *tensors) {
std::vector<std::string> fields;
Split(line, ';', &fields);
tensors->clear();
tensors->reserve(4);
int i = 0;
auto input_name = FLAGS_ernie_large ? "eval_placeholder_" : "placeholder_";
for (; i < 3; i++) {
paddle::PaddleTensor temp;
ParseTensor<int64_t>(fields[i], &temp);
temp.name = input_name + std::to_string(i);
tensors->push_back(temp);
}
// input_mask
paddle::PaddleTensor input_mask;
ParseTensor<float>(fields[i], &input_mask);
input_mask.name = input_name + std::to_string(i);
tensors->push_back(input_mask);
return true;
}
bool LoadInputData(std::vector<std::vector<paddle::PaddleTensor>> *inputs,
int batch_size = 1) {
if (FLAGS_infer_data.empty()) {
LOG(ERROR) << "please set input data path";
return false;
}
std::ifstream fin(FLAGS_infer_data);
std::string line;
int sample = 0;
// The unit-test dataset only have 10 samples, each sample have 5 feeds.
while (std::getline(fin, line)) {
std::vector<paddle::PaddleTensor> feed_data;
ParseLine(line, &feed_data);
inputs->push_back(std::move(feed_data));
sample++;
if (!FLAGS_test_all_data && sample == batch_size) break;
}
LOG(INFO) << "number of samples: " << sample;
return true;
}
// Compare results
TEST(Ernie_gpu_fp16_no_ir, compare_results) {
AnalysisConfig config;
config.SetModel(FLAGS_infer_model);
config.EnableUseGpu(512, 0, paddle_infer::PrecisionType::kHalf);
config.SwitchIrOptim(false);
auto predictor = CreatePaddlePredictor(config);
std::vector<std::vector<PaddleTensor>> input_slots_all;
LoadInputData(&input_slots_all);
std::ifstream fin(FLAGS_refer_result);
std::string line;
std::vector<float> ref;
while (std::getline(fin, line)) {
Split(line, ' ', &ref);
}
std::vector<PaddleTensor> outputs;
for (size_t i = 0; i < input_slots_all.size(); i++) {
outputs.clear();
predictor->Run(input_slots_all[i], &outputs);
auto output = outputs.front();
size_t outputs_size = 1;
for (auto dim : output.shape) {
outputs_size *= dim;
}
float *result = reinterpret_cast<float *>(output.data.data());
for (size_t j = 0; j < outputs_size; ++j) {
EXPECT_NEAR(ref[i * outputs_size + j], result[j], 5e-2);
}
}
}
// Compare results
TEST(Ernie_gpu_fp16_with_ir, compare_results) {
AnalysisConfig config;
config.SetModel(FLAGS_infer_model);
config.EnableUseGpu(512, 0, paddle_infer::PrecisionType::kHalf);
config.SwitchIrOptim(true);
// The fc_fuse_pass has diff, which will be repaired later.
config.pass_builder()->DeletePass("fc_fuse_pass");
// There is a problem with the model itself, which has nothing to do with
// constant_folding_pass.
config.pass_builder()->DeletePass("constant_folding_pass");
auto predictor = CreatePaddlePredictor(config);
std::vector<std::vector<PaddleTensor>> input_slots_all;
LoadInputData(&input_slots_all);
std::ifstream fin(FLAGS_refer_result);
std::string line;
std::vector<float> ref;
while (std::getline(fin, line)) {
Split(line, ' ', &ref);
}
std::vector<PaddleTensor> outputs;
for (size_t i = 0; i < input_slots_all.size(); i++) {
outputs.clear();
predictor->Run(input_slots_all[i], &outputs);
auto output = outputs.front();
size_t outputs_size = 1;
for (auto dim : output.shape) {
outputs_size *= dim;
}
float *result = reinterpret_cast<float *>(output.data.data());
for (size_t j = 0; j < outputs_size; ++j) {
EXPECT_NEAR(ref[i * outputs_size + j], result[j], 5e-2);
}
}
}
// Compare results
TEST(Ernie_gpu_bf16_no_ir, compare_results) {
AnalysisConfig config;
config.SetModel(FLAGS_infer_model);
config.EnableUseGpu(512, 0, paddle_infer::PrecisionType::kBf16);
config.SwitchIrOptim(false);
auto predictor = CreatePaddlePredictor(config);
std::vector<std::vector<PaddleTensor>> input_slots_all;
LoadInputData(&input_slots_all);
std::ifstream fin(FLAGS_refer_result);
std::string line;
std::vector<float> ref;
while (std::getline(fin, line)) {
Split(line, ' ', &ref);
}
std::vector<PaddleTensor> outputs;
for (size_t i = 0; i < input_slots_all.size(); i++) {
outputs.clear();
predictor->Run(input_slots_all[i], &outputs);
auto output = outputs.front();
size_t outputs_size = 1;
for (auto dim : output.shape) {
outputs_size *= dim;
}
float *result = reinterpret_cast<float *>(output.data.data());
for (size_t j = 0; j < outputs_size; ++j) {
EXPECT_NEAR(ref[i * outputs_size + j], result[j], 7e-2);
}
}
}
// Compare results
TEST(Ernie_gpu_bf16_with_ir, compare_results) {
AnalysisConfig config;
config.SetModel(FLAGS_infer_model);
config.EnableUseGpu(512, 0, paddle_infer::PrecisionType::kBf16);
config.SwitchIrOptim(true);
// The fc_fuse_pass has diff, which will be repaired later.
config.pass_builder()->DeletePass("fc_fuse_pass");
// There is a problem with the model itself, which has nothing to do with
// constant_folding_pass.
config.pass_builder()->DeletePass("constant_folding_pass");
auto predictor = CreatePaddlePredictor(config);
std::vector<std::vector<PaddleTensor>> input_slots_all;
LoadInputData(&input_slots_all);
std::ifstream fin(FLAGS_refer_result);
std::string line;
std::vector<float> ref;
while (std::getline(fin, line)) {
Split(line, ' ', &ref);
}
std::vector<PaddleTensor> outputs;
for (size_t i = 0; i < input_slots_all.size(); i++) {
outputs.clear();
predictor->Run(input_slots_all[i], &outputs);
auto output = outputs.front();
size_t outputs_size = 1;
for (auto dim : output.shape) {
outputs_size *= dim;
}
float *result = reinterpret_cast<float *>(output.data.data());
for (size_t j = 0; j < outputs_size; ++j) {
EXPECT_NEAR(ref[i * outputs_size + j], result[j], 7e-2);
}
}
}
} // namespace inference
} // namespace paddle
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. /* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License"); Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License. you may not use this file except in compliance with the License.
...@@ -12,15 +12,11 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ...@@ -12,15 +12,11 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#include <cuda_runtime.h>
#include <glog/logging.h> #include <glog/logging.h>
#include <gtest/gtest.h> #include <gtest/gtest.h>
#include <cstring>
#include <numeric>
#include "gflags/gflags.h" #include "gflags/gflags.h"
#include "paddle/fluid/inference/tests/api/trt_test_helper.h" #include "paddle/fluid/inference/tests/api/tester_helper.h"
namespace paddle_infer { namespace paddle_infer {
......
...@@ -262,10 +262,6 @@ if(WITH_PYTHON) ...@@ -262,10 +262,6 @@ if(WITH_PYTHON)
list(APPEND OP_FUNCTION_GENERETOR_DEPS cncl_context) list(APPEND OP_FUNCTION_GENERETOR_DEPS cncl_context)
endif() endif()
if(NOT ((NOT WITH_PYTHON) AND ON_INFER))
list(APPEND OP_FUNCTION_GENERETOR_DEPS ${PYTHON_LIBRARIES})
endif()
add_executable(op_function_generator op_function_generator.cc) add_executable(op_function_generator op_function_generator.cc)
target_link_libraries(op_function_generator ${OP_FUNCTION_GENERETOR_DEPS}) target_link_libraries(op_function_generator ${OP_FUNCTION_GENERETOR_DEPS})
add_executable(eager_legacy_op_function_generator add_executable(eager_legacy_op_function_generator
...@@ -605,13 +601,4 @@ if(WITH_PYTHON) ...@@ -605,13 +601,4 @@ if(WITH_PYTHON)
get_property(os_dependency_modules GLOBAL PROPERTY OS_DEPENDENCY_MODULES) get_property(os_dependency_modules GLOBAL PROPERTY OS_DEPENDENCY_MODULES)
target_link_libraries(${SHARD_LIB_NAME} ${os_dependency_modules}) target_link_libraries(${SHARD_LIB_NAME} ${os_dependency_modules})
add_dependencies(${SHARD_LIB_NAME} op_function_generator_cmd) add_dependencies(${SHARD_LIB_NAME} op_function_generator_cmd)
if(APPLE)
string(REGEX REPLACE ".+/(.+)" "\\1" PYTHON_LIBRARY_NAME
${PYTHON_LIBRARIES})
# target_link_libraries(${SHARD_LIB_NAME} "-Wl,-rpath,${PYTHON_LIBRARY_NAME}")
else()
target_link_libraries(${SHARD_LIB_NAME} ${PYTHON_LIBRARIES})
endif()
endif() endif()
...@@ -642,7 +642,8 @@ void BindAnalysisConfig(py::module *m) { ...@@ -642,7 +642,8 @@ void BindAnalysisConfig(py::module *m) {
.def("enable_use_gpu", .def("enable_use_gpu",
&AnalysisConfig::EnableUseGpu, &AnalysisConfig::EnableUseGpu,
py::arg("memory_pool_init_size_mb"), py::arg("memory_pool_init_size_mb"),
py::arg("device_id") = 0) py::arg("device_id") = 0,
py::arg("precision_mode") = AnalysisConfig::Precision::kFloat32)
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
.def("set_exec_stream", .def("set_exec_stream",
[](AnalysisConfig &self, phi::CUDAStream &stream) { [](AnalysisConfig &self, phi::CUDAStream &stream) {
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册