未验证 提交 88b52a27 编写于 作者: Z Zhaolong Xing 提交者: GitHub

Inference: fix mask rcnn model diff, optim memory usage, memory leak. (#18532)

* Fix Mask rcnn predictor
    1. refine memory optim algorithm to support the model with the block op.
    2. output diff : modify the affine channel fuse
    3. add condition_block_infer op
add interface for setting trt calib table dir
test=develop

* add the missing files.
test=develop
上级 15291548
...@@ -504,6 +504,16 @@ PDNode *PDNode::assert_op_has_n_outputs(const std::string &op_type, size_t n) { ...@@ -504,6 +504,16 @@ PDNode *PDNode::assert_op_has_n_outputs(const std::string &op_type, size_t n) {
return this; return this;
} }
PDNode *PDNode::assert_has_n_inputs(size_t n) {
asserts_.emplace_back([=](Node *x) { return x->inputs.size() == n; });
return this;
}
PDNode *PDNode::assert_has_n_outputs(size_t n) {
asserts_.emplace_back([=](Node *x) { return x->outputs.size() == n; });
return this;
}
PDNode *PDNode::assert_more(PDNode::teller_t &&teller) { PDNode *PDNode::assert_more(PDNode::teller_t &&teller) {
asserts_.emplace_back(std::move(teller)); asserts_.emplace_back(std::move(teller));
return this; return this;
...@@ -1469,11 +1479,13 @@ PDNode *patterns::ConvAffineChannel::operator()( ...@@ -1469,11 +1479,13 @@ PDNode *patterns::ConvAffineChannel::operator()(
auto *ac_scale_var = pattern->NewNode(ac_scale_repr()) auto *ac_scale_var = pattern->NewNode(ac_scale_repr())
->AsInput() ->AsInput()
->assert_is_persistable_var() ->assert_is_persistable_var()
->assert_has_n_outputs(1)
->assert_is_op_input("affine_channel", "Scale"); ->assert_is_op_input("affine_channel", "Scale");
// AC Bias // AC Bias
auto *ac_bias_var = pattern->NewNode(ac_bias_repr()) auto *ac_bias_var = pattern->NewNode(ac_bias_repr())
->AsInput() ->AsInput()
->assert_is_persistable_var() ->assert_is_persistable_var()
->assert_has_n_outputs(1)
->assert_is_op_input("affine_channel", "Bias"); ->assert_is_op_input("affine_channel", "Bias");
// AC output // AC output
......
...@@ -131,6 +131,9 @@ struct PDNode { ...@@ -131,6 +131,9 @@ struct PDNode {
const std::unordered_set<std::string>& op_types, const std::unordered_set<std::string>& op_types,
const std::string& argument, int nth); const std::string& argument, int nth);
PDNode* assert_has_n_inputs(size_t n);
PDNode* assert_has_n_outputs(size_t n);
template <typename T> template <typename T>
PDNode* assert_op_attr(const std::string& attr_name, const T& attr) { PDNode* assert_op_attr(const std::string& attr_name, const T& attr) {
asserts_.emplace_back([=](Node* x) { asserts_.emplace_back([=](Node* x) {
......
...@@ -59,7 +59,6 @@ struct Argument { ...@@ -59,7 +59,6 @@ struct Argument {
using unique_ptr_t = std::unique_ptr<void, std::function<void(void*)>>; using unique_ptr_t = std::unique_ptr<void, std::function<void(void*)>>;
using fusion_statis_t = std::unordered_map<std::string, int>; using fusion_statis_t = std::unordered_map<std::string, int>;
using engine_opt_info_t = std::map<std::string, std::string>;
using anakin_max_shape_t = std::map<std::string, std::vector<int>>; using anakin_max_shape_t = std::map<std::string, std::vector<int>>;
bool Has(const std::string& key) const { return valid_fields_.count(key); } bool Has(const std::string& key) const { return valid_fields_.count(key); }
...@@ -130,7 +129,7 @@ struct Argument { ...@@ -130,7 +129,7 @@ struct Argument {
DECL_ARGUMENT_FIELD(model_program_path, ModelProgramPath, std::string); DECL_ARGUMENT_FIELD(model_program_path, ModelProgramPath, std::string);
DECL_ARGUMENT_FIELD(model_params_path, ModelParamsPath, std::string); DECL_ARGUMENT_FIELD(model_params_path, ModelParamsPath, std::string);
DECL_ARGUMENT_FIELD(model_from_memory, ModelFromMemory, bool); DECL_ARGUMENT_FIELD(model_from_memory, ModelFromMemory, bool);
DECL_ARGUMENT_FIELD(engine_opt_info, EngineOptInfo, engine_opt_info_t); DECL_ARGUMENT_FIELD(optim_cache_dir, OptimCacheDir, std::string);
// The overall graph to work on. // The overall graph to work on.
DECL_ARGUMENT_UNIQUE_FIELD(main_graph, MainGraph, framework::ir::Graph); DECL_ARGUMENT_UNIQUE_FIELD(main_graph, MainGraph, framework::ir::Graph);
......
...@@ -94,11 +94,20 @@ void IRPassManager::CreatePasses(Argument *argument, ...@@ -94,11 +94,20 @@ void IRPassManager::CreatePasses(Argument *argument,
bool use_static_engine = argument->tensorrt_use_static_engine(); bool use_static_engine = argument->tensorrt_use_static_engine();
bool model_from_memory = argument->model_from_memory(); bool model_from_memory = argument->model_from_memory();
bool int8_valid = !(model_from_memory && enable_int8); std::string optim_cache_dir = argument->optim_cache_dir();
bool int8_valid =
!(model_from_memory && optim_cache_dir.empty() && enable_int8);
PADDLE_ENFORCE(int8_valid, PADDLE_ENFORCE(int8_valid,
"TRT INT8 Now don't support model load from memory."); "When you are in TRT INT8 mode, and load model from "
"memory, you should set optim_cache_dir using "
if ((!model_from_memory && use_static_engine) || enable_int8) { "config.SetOptimCacheDir()");
PADDLE_ENFORCE(!(model_from_memory && use_static_engine),
"When you are using Paddle-TRT, and also using load model "
"from memory, you should set the use_static to false.");
if (!optim_cache_dir.empty()) {
pass->Set("model_opt_cache_dir", new std::string(optim_cache_dir));
} else if (use_static_engine || enable_int8) {
std::string model_opt_cache_dir = std::string model_opt_cache_dir =
argument->Has("model_dir") argument->Has("model_dir")
? argument->model_dir() ? argument->model_dir()
...@@ -110,8 +119,6 @@ void IRPassManager::CreatePasses(Argument *argument, ...@@ -110,8 +119,6 @@ void IRPassManager::CreatePasses(Argument *argument,
pass->Set("gpu_device_id", new int(argument->gpu_device_id())); pass->Set("gpu_device_id", new int(argument->gpu_device_id()));
pass->Set("use_static_engine", new bool(use_static_engine)); pass->Set("use_static_engine", new bool(use_static_engine));
pass->Set("model_from_memory", new bool(argument->model_from_memory())); pass->Set("model_from_memory", new bool(argument->model_from_memory()));
pass->Set("engine_opt_info", new std::map<std::string, std::string>(
argument->engine_opt_info()));
} }
if (pass_name == "ngraph_subgraph_pass") { if (pass_name == "ngraph_subgraph_pass") {
pass->Set("program", pass->Set("program",
...@@ -123,8 +130,6 @@ void IRPassManager::CreatePasses(Argument *argument, ...@@ -123,8 +130,6 @@ void IRPassManager::CreatePasses(Argument *argument,
pass->Set("use_gpu", new bool(argument->use_gpu())); pass->Set("use_gpu", new bool(argument->use_gpu()));
pass->Set("gpu_device_id", new int(argument->gpu_device_id())); pass->Set("gpu_device_id", new int(argument->gpu_device_id()));
pass->Set("model_from_memory", new bool(argument->model_from_memory())); pass->Set("model_from_memory", new bool(argument->model_from_memory()));
pass->Set("engine_opt_info", new std::map<std::string, std::string>(
argument->engine_opt_info()));
pass->Set("predictor_id", new int(argument->predictor_id())); pass->Set("predictor_id", new int(argument->predictor_id()));
pass->Set("max_input_shape", new std::map<std::string, std::vector<int>>( pass->Set("max_input_shape", new std::map<std::string, std::vector<int>>(
argument->anakin_max_input_shape())); argument->anakin_max_input_shape()));
......
...@@ -226,6 +226,7 @@ void TensorRtSubgraphPass::CreateTensorRTOp( ...@@ -226,6 +226,7 @@ void TensorRtSubgraphPass::CreateTensorRTOp(
std::unique_ptr<tensorrt::TRTInt8Calibrator> calibrator; std::unique_ptr<tensorrt::TRTInt8Calibrator> calibrator;
if (enable_int8 && calibration_data.size() != 0) { if (enable_int8 && calibration_data.size() != 0) {
calibrator.reset(new tensorrt::TRTInt8Calibrator(calibration_data)); calibrator.reset(new tensorrt::TRTInt8Calibrator(calibration_data));
LOG(INFO) << "RUN Paddle TRT int8 calibration mode...";
} }
// When in int8 mode and calibration_mode, the program just produce the // When in int8 mode and calibration_mode, the program just produce the
// calibration table data. // calibration table data.
......
...@@ -4,6 +4,7 @@ cc_library(memory_optim_pass SRCS memory_optimize_pass.cc DEPS analysis_pass zer ...@@ -4,6 +4,7 @@ cc_library(memory_optim_pass SRCS memory_optimize_pass.cc DEPS analysis_pass zer
cc_library(ir_params_sync_among_devices_pass SRCS ir_params_sync_among_devices_pass.cc DEPS analysis_pass argument ir_pass_manager) cc_library(ir_params_sync_among_devices_pass SRCS ir_params_sync_among_devices_pass.cc DEPS analysis_pass argument ir_pass_manager)
cc_library(ir_graph_to_program_pass SRCS ir_graph_to_program_pass.cc DEPS analysis_pass graph_to_program_pass) cc_library(ir_graph_to_program_pass SRCS ir_graph_to_program_pass.cc DEPS analysis_pass graph_to_program_pass)
cc_library(adjust_cudnn_workspace_size_pass SRCS adjust_cudnn_workspace_size_pass.cc DEPS analysis_pass graph_to_program_pass) cc_library(adjust_cudnn_workspace_size_pass SRCS adjust_cudnn_workspace_size_pass.cc DEPS analysis_pass graph_to_program_pass)
cc_library(inference_op_replace_pass SRCS inference_op_replace_pass.cc DEPS analysis_pass graph_to_program_pass)
cc_library(analysis_passes SRCS passes.cc DEPS cc_library(analysis_passes SRCS passes.cc DEPS
ir_graph_build_pass ir_graph_build_pass
...@@ -11,6 +12,7 @@ cc_library(analysis_passes SRCS passes.cc DEPS ...@@ -11,6 +12,7 @@ cc_library(analysis_passes SRCS passes.cc DEPS
ir_params_sync_among_devices_pass ir_params_sync_among_devices_pass
adjust_cudnn_workspace_size_pass adjust_cudnn_workspace_size_pass
memory_optim_pass memory_optim_pass
inference_op_replace_pass
ir_graph_to_program_pass ir_graph_to_program_pass
) )
......
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/inference/analysis/passes/inference_op_replace_pass.h"
#include <unordered_map>
namespace paddle {
namespace inference {
namespace analysis {
void InferenceOpReplacePass::RunImpl(Argument* argument) {
if (!argument->use_gpu()) return;
std::unordered_map<std::string, std::string> replaced_map{
{"conditional_block", "conditional_block_infer"},
};
auto& graph = argument->main_graph();
auto nodes = graph.Nodes();
for (auto& node : nodes) {
if (!node->IsOp()) continue;
auto* op_desc = node->Op();
std::string op_type = op_desc->Type();
if (!replaced_map.count(op_type)) continue;
op_desc->SetType(replaced_map[op_type]);
op_desc->Flush();
}
}
std::string InferenceOpReplacePass::repr() const {
return "inference-op-replace-pass";
}
} // namespace analysis
} // namespace inference
} // namespace paddle
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <string>
#include <vector>
#include "paddle/fluid/framework/ir/fuse_pass_base.h"
#include "paddle/fluid/framework/scope.h"
#include "paddle/fluid/inference/analysis/analysis_pass.h"
#include "paddle/fluid/platform/place.h"
namespace paddle {
namespace inference {
namespace analysis {
/*
* There are some ops (while, conditional_block_op etc) which have different
* optimization points under predicion and training conditions.
* So, We added the corresponding inference impl to these ops separately.
* This pass replaces these ops with corresponding inference ops.
*/
class InferenceOpReplacePass : public AnalysisPass {
public:
void RunImpl(Argument *argument) override;
std::string repr() const override;
};
} // namespace analysis
} // namespace inference
} // namespace paddle
...@@ -18,6 +18,7 @@ ...@@ -18,6 +18,7 @@
#include <functional> #include <functional>
#include <limits> #include <limits>
#include <map> #include <map>
#include <set>
#include <string> #include <string>
#include <type_traits> #include <type_traits>
#include <utility> #include <utility>
...@@ -108,11 +109,34 @@ int DataTypeToSpace(framework::proto::VarType_Type type) { ...@@ -108,11 +109,34 @@ int DataTypeToSpace(framework::proto::VarType_Type type) {
void MemoryOptimizePass::CollectVarMemorySize( void MemoryOptimizePass::CollectVarMemorySize(
space_table_t* space_table) const { space_table_t* space_table) const {
const int fake_batch_size = 1; const int fake_batch_size = 1;
auto valid_var = [&](framework::ir::Node* node) -> bool {
std::set<std::string> invalid_op = {"while", "conditional_block",
"tensorrt_engine",
"conditional_block_infer"};
for (auto* tmp : node->inputs) {
CHECK(tmp->IsOp());
std::string op_type = tmp->Op()->Type();
if (std::find(invalid_op.begin(), invalid_op.end(), op_type) !=
invalid_op.end()) {
return false;
}
}
for (auto* tmp : node->outputs) {
CHECK(tmp->IsOp());
std::string op_type = tmp->Op()->Type();
if (std::find(invalid_op.begin(), invalid_op.end(), op_type) !=
invalid_op.end()) {
return false;
}
}
return true;
};
// Collect tensors from graph. // Collect tensors from graph.
for (auto* node : graph_->Nodes()) { for (auto* node : graph_->Nodes()) {
if (node->IsVar() && if (node->IsVar() &&
node->Var()->GetType() == node->Var()->GetType() ==
framework::proto::VarType::Type::VarType_Type_LOD_TENSOR) { framework::proto::VarType::Type::VarType_Type_LOD_TENSOR &&
valid_var(node)) {
// Parameters will not be reused. // Parameters will not be reused.
if (node->Var()->Persistable()) continue; if (node->Var()->Persistable()) continue;
auto shape = node->Var()->GetShape(); auto shape = node->Var()->GetShape();
...@@ -135,12 +159,9 @@ void MakeSimpleReusePlan( ...@@ -135,12 +159,9 @@ void MakeSimpleReusePlan(
std::unordered_map<std::string, int>* cluster_size) { std::unordered_map<std::string, int>* cluster_size) {
std::vector<MemNode> mem_nodes; std::vector<MemNode> mem_nodes;
for (auto& data : lifecycles) { for (auto& data : lifecycles) {
if (!space_table.count(data.first)) continue;
MemNode temp_node; MemNode temp_node;
temp_node.name = data.first; temp_node.name = data.first;
PADDLE_ENFORCE(
space_table.count(data.first),
"%s variable should be in the spacetable during memory optimize",
data.first);
temp_node.size = space_table.at(data.first); temp_node.size = space_table.at(data.first);
temp_node.cluster = -1; temp_node.cluster = -1;
temp_node.lifetime = data.second; temp_node.lifetime = data.second;
......
...@@ -14,6 +14,7 @@ ...@@ -14,6 +14,7 @@
#include "paddle/fluid/inference/analysis/passes/passes.h" #include "paddle/fluid/inference/analysis/passes/passes.h"
#include "paddle/fluid/inference/analysis/passes/adjust_cudnn_workspace_size_pass.h" #include "paddle/fluid/inference/analysis/passes/adjust_cudnn_workspace_size_pass.h"
#include "paddle/fluid/inference/analysis/passes/inference_op_replace_pass.h"
#include "paddle/fluid/inference/analysis/passes/ir_analysis_pass.h" #include "paddle/fluid/inference/analysis/passes/ir_analysis_pass.h"
#include "paddle/fluid/inference/analysis/passes/ir_graph_build_pass.h" #include "paddle/fluid/inference/analysis/passes/ir_graph_build_pass.h"
#include "paddle/fluid/inference/analysis/passes/ir_graph_to_program_pass.h" #include "paddle/fluid/inference/analysis/passes/ir_graph_to_program_pass.h"
...@@ -38,6 +39,8 @@ PassRegistry::PassRegistry() { ...@@ -38,6 +39,8 @@ PassRegistry::PassRegistry() {
std::unique_ptr<AnalysisPass>(new IrParamsSyncAmongDevicesPass)); std::unique_ptr<AnalysisPass>(new IrParamsSyncAmongDevicesPass));
passes_.emplace("adjust_cudnn_workspace_size_pass", passes_.emplace("adjust_cudnn_workspace_size_pass",
std::unique_ptr<AnalysisPass>(new AdjustCudnnWorkSpacePass)); std::unique_ptr<AnalysisPass>(new AdjustCudnnWorkSpacePass));
passes_.emplace("inference_op_replace_pass",
std::unique_ptr<AnalysisPass>(new InferenceOpReplacePass));
passes_.emplace( passes_.emplace(
"ir_graph_to_program_pass", "ir_graph_to_program_pass",
std::unique_ptr<IrGraphToProgramPass>(new IrGraphToProgramPass)); std::unique_ptr<IrGraphToProgramPass>(new IrGraphToProgramPass));
......
...@@ -90,6 +90,7 @@ AnalysisConfig::AnalysisConfig(const AnalysisConfig &other) { ...@@ -90,6 +90,7 @@ AnalysisConfig::AnalysisConfig(const AnalysisConfig &other) {
CP_MEMBER(model_from_memory_); // the memory model reuses prog_file_ and CP_MEMBER(model_from_memory_); // the memory model reuses prog_file_ and
// params_file_ fields. // params_file_ fields.
CP_MEMBER(opt_cache_dir_);
prog_file_ = std::move(other.prog_file_); prog_file_ = std::move(other.prog_file_);
params_file_ = std::move(other.params_file_); params_file_ = std::move(other.params_file_);
...@@ -406,11 +407,6 @@ void AnalysisConfig::SetModelBuffer(const char *prog_buffer, ...@@ -406,11 +407,6 @@ void AnalysisConfig::SetModelBuffer(const char *prog_buffer,
Update(); Update();
} }
void AnalysisConfig::SetEngineOptInfo(
std::map<std::string, std::string> engine_opt_info) {
engine_opt_info_ = engine_opt_info;
}
NativeConfig AnalysisConfig::ToNativeConfig() const { NativeConfig AnalysisConfig::ToNativeConfig() const {
NativeConfig config; NativeConfig config;
config.model_dir = model_dir_; config.model_dir = model_dir_;
......
...@@ -360,10 +360,10 @@ void AnalysisPredictor::PrepareArgument() { ...@@ -360,10 +360,10 @@ void AnalysisPredictor::PrepareArgument() {
argument_.SetStaticMemoryOptimForceUpdate( argument_.SetStaticMemoryOptimForceUpdate(
config_.static_memory_optim_force_update_); config_.static_memory_optim_force_update_);
argument_.SetModelFromMemory(config_.model_from_memory_); argument_.SetModelFromMemory(config_.model_from_memory_);
argument_.SetEngineOptInfo(config_.engine_opt_info_);
// Analyze inference_program // Analyze inference_program
argument_.SetUseAnakin(config_.anakin_engine_enabled()); argument_.SetUseAnakin(config_.anakin_engine_enabled());
argument_.SetPredictorID(predictor_id_); argument_.SetPredictorID(predictor_id_);
argument_.SetOptimCacheDir(config_.opt_cache_dir_);
if (!config_.model_dir().empty()) { if (!config_.model_dir().empty()) {
argument_.SetModelDir(config_.model_dir()); argument_.SetModelDir(config_.model_dir());
} else { } else {
......
...@@ -61,6 +61,11 @@ struct AnalysisConfig { ...@@ -61,6 +61,11 @@ struct AnalysisConfig {
/** Set parameter composed file path. /** Set parameter composed file path.
*/ */
void SetParamsFile(const std::string& x) { params_file_ = x; } void SetParamsFile(const std::string& x) { params_file_ = x; }
/** Set opt cache dir.
*/
void SetOptimCacheDir(const std::string& opt_cache_dir) {
opt_cache_dir_ = opt_cache_dir;
}
/** Get the model directory path. /** Get the model directory path.
*/ */
const std::string& model_dir() const { return model_dir_; } const std::string& model_dir() const { return model_dir_; }
...@@ -143,7 +148,7 @@ struct AnalysisConfig { ...@@ -143,7 +148,7 @@ struct AnalysisConfig {
int max_batch_size = 1, int min_subgraph_size = 3, int max_batch_size = 1, int min_subgraph_size = 3,
Precision precision = Precision::kFloat32, Precision precision = Precision::kFloat32,
bool use_static = false, bool use_static = false,
bool use_calib_mode = false); bool use_calib_mode = true);
/** A boolean state telling whether the TensorRT engine is used. /** A boolean state telling whether the TensorRT engine is used.
*/ */
bool tensorrt_engine_enabled() const { return use_tensorrt_; } bool tensorrt_engine_enabled() const { return use_tensorrt_; }
...@@ -223,7 +228,6 @@ struct AnalysisConfig { ...@@ -223,7 +228,6 @@ struct AnalysisConfig {
/** A boolean state telling whether the model is set from the CPU memory. /** A boolean state telling whether the model is set from the CPU memory.
*/ */
bool model_from_memory() const { return model_from_memory_; } bool model_from_memory() const { return model_from_memory_; }
void SetEngineOptInfo(std::map<std::string, std::string> engine_opt_info);
/** Turn on memory optimize /** Turn on memory optimize
* NOTE still in development, will release latter. * NOTE still in development, will release latter.
...@@ -311,15 +315,15 @@ struct AnalysisConfig { ...@@ -311,15 +315,15 @@ struct AnalysisConfig {
bool anakin_auto_config_layout_{false}; bool anakin_auto_config_layout_{false};
std::vector<std::string> anakin_passes_filter_; std::vector<std::string> anakin_passes_filter_;
std::vector<std::string> anakin_ops_filter_; std::vector<std::string> anakin_ops_filter_;
std::map<std::string, std::string> engine_opt_info_;
bool use_mkldnn_quantizer_{false}; bool use_mkldnn_quantizer_{false};
std::shared_ptr<MkldnnQuantizerConfig> mkldnn_quantizer_config_; std::shared_ptr<MkldnnQuantizerConfig> mkldnn_quantizer_config_;
// If the config is already used on a predictor, it becomes invalid. // If the config is already used on a predictor, it becomes invalid.
mutable bool is_valid_{true};
// Any config can only be used with one predictor. // Any config can only be used with one predictor.
// Variables held by config can take up a lot of memory in some cases. // Variables held by config can take up a lot of memory in some cases.
// So we release the memory when the predictor is set up. // So we release the memory when the predictor is set up.
mutable bool is_valid_{true};
std::string opt_cache_dir_;
}; };
} // namespace paddle } // namespace paddle
...@@ -73,8 +73,8 @@ class PaddlePassBuilder { ...@@ -73,8 +73,8 @@ class PaddlePassBuilder {
protected: protected:
std::vector<std::string> analysis_passes_{ std::vector<std::string> analysis_passes_{
{"ir_graph_build_pass", "ir_analysis_pass", {"ir_graph_build_pass", "ir_analysis_pass",
"ir_params_sync_among_devices_pass", "ir_params_sync_among_devices_pass", "adjust_cudnn_workspace_size_pass",
"adjust_cudnn_workspace_size_pass"}}; "inference_op_replace_pass"}};
std::vector<std::string> passes_; std::vector<std::string> passes_;
}; };
......
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/operators/controlflow/conditional_block_op.h"
namespace paddle {
namespace operators {
/* We will implement the op with block separately in the future.
* The main reason is that some of the training requirements
* in these OPS can lead to problems(such as memory leaks) during inference.
*/
class ConditionalBlockInferOp : public ConditionalOp {
public:
ConditionalBlockInferOp(const std::string &type,
const framework::VariableNameMap &inputs,
const framework::VariableNameMap &outputs,
const framework::AttributeMap &attrs)
: ConditionalOp(type, inputs, outputs, attrs) {}
private:
void RunImpl(const framework::Scope &scope,
const platform::Place &dev_place) const override {
bool need_run;
if (Attr<bool>("is_scalar_condition")) {
// When is_scalar_condition is True, the conditional variable is a scalar,
// whether need to execute the operators in sub-block depends on the
// conditional variable (Cond).
auto xs = InputTensors(scope, "Cond");
need_run = ScalarCondition(xs);
} else {
// When is_scalar_condition is False, the conditional variable maybe a
// vector or tensor, whether need to execute the operators in sub-block
// depends on the input variables (Input).
auto xs = InputTensors(scope, "Input");
need_run = std::all_of(
xs.begin(), xs.end(),
[](const framework::LoDTensor *t) { return t->numel() != 0; });
}
if (need_run) {
auto *scope_var = scope.FindVar(Output("Scope"));
PADDLE_ENFORCE(scope_var != nullptr, "Must set scope");
auto *scopes = scope_var->GetMutable<std::vector<framework::Scope *>>();
scopes->resize(1);
scopes->front() = &scope.NewScope();
auto &cur_scope = *scopes->front();
framework::Executor exec(dev_place);
auto *block = Attr<framework::BlockDesc *>("sub_block");
exec.Run(*block->Program(), &cur_scope, block->ID(), false);
scope.DeleteScope(scopes->front());
}
}
};
} // namespace operators
} // namespace paddle
namespace ops = paddle::operators;
REGISTER_OPERATOR(conditional_block_infer, ops::ConditionalBlockInferOp,
ops::ConditionalBlockOpProtoMaker,
paddle::framework::EmptyGradOpMaker);
...@@ -11,67 +11,12 @@ distributed under the License is distributed on an "AS IS" BASIS, ...@@ -11,67 +11,12 @@ distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#include <algorithm>
#include <memory> #include "paddle/fluid/operators/controlflow/conditional_block_op.h"
#include <string>
#include <vector>
#include "paddle/fluid/framework/executor.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/var_type.h"
namespace paddle { namespace paddle {
namespace operators { namespace operators {
class ConditionalOp : public framework::OperatorBase {
public:
ConditionalOp(const std::string &type,
const framework::VariableNameMap &inputs,
const framework::VariableNameMap &outputs,
const framework::AttributeMap &attrs)
: OperatorBase(type, inputs, outputs, attrs) {}
protected:
std::vector<const framework::LoDTensor *> InputTensors(
const framework::Scope &scope, const std::string &in_name) const {
std::vector<const framework::LoDTensor *> retv;
auto xs = Inputs(in_name);
retv.resize(xs.size(), nullptr);
std::transform(
xs.begin(), xs.end(), retv.begin(),
[&scope](const std::string &var_name) -> const framework::LoDTensor * {
auto *var = scope.FindVar(var_name);
PADDLE_ENFORCE(var != nullptr, "Cannot find variable %s", var_name);
return &var->Get<framework::LoDTensor>();
});
return retv;
}
bool ScalarCondition(
const std::vector<const framework::LoDTensor *> &ips) const {
if (!(ips.size() == 1UL && ips[0]->IsInitialized())) {
PADDLE_THROW("should have one initialized input as condition");
}
PADDLE_ENFORCE(ips[0]->type() == framework::proto::VarType::BOOL &&
ips[0]->numel() == 1,
"condition input's data type should be bool, "
"numel should be 1, actual numel is %d",
ips[0]->numel());
bool res = false;
if (platform::is_gpu_place(ips[0]->place())) {
#ifdef PADDLE_WITH_CUDA
framework::LoDTensor cpu_tensor;
framework::TensorCopy(*ips[0], platform::CPUPlace(), &cpu_tensor);
platform::DeviceContextPool::Instance().Get(ips[0]->place())->Wait();
res = cpu_tensor.data<bool>()[0];
#endif
} else {
res = ips[0]->data<bool>()[0];
}
return res;
}
};
class ConditionalBlockOp : public ConditionalOp { class ConditionalBlockOp : public ConditionalOp {
public: public:
ConditionalBlockOp(const std::string &type, ConditionalBlockOp(const std::string &type,
...@@ -115,38 +60,6 @@ class ConditionalBlockOp : public ConditionalOp { ...@@ -115,38 +60,6 @@ class ConditionalBlockOp : public ConditionalOp {
} }
}; };
class ConditionalBlockOpProtoMaker : public framework::OpProtoAndCheckerMaker {
public:
void Make() override {
AddInput("Cond",
"The conditional variable of this operator. If Cond is empty, the "
"whole sub-block will not be executed.")
.AsDuplicable();
AddInput("Input", "The input variables of the sub-block.").AsDuplicable();
AddOutput("Out", "The output variables of the sub-block.").AsDuplicable();
AddOutput("Scope",
"(std::vector<Scope*>) The step scope of conditional block. To "
"unify the conditional block, rnn and while op, the type of "
"scope is std::vector<Scope*>");
AddAttr<framework::BlockDesc *>(
"sub_block", "The step block of conditional block operator");
AddAttr<bool>("is_scalar_condition",
"The conditional variable (Cond) is used as scalar "
"condition.")
.SetDefault(false);
AddComment(R"DOC(Conditional block operator
If `is_scalar_condition` is True, the conditional variable (Cond) is a scalar,
run the operators in sub-block if Cond is True.
If `is_scalar_condition` is False, the conditional variable (Cond) is a vector or
tensor, run the operators in sub-block if all of input variables are not empty.
)DOC");
}
};
class ConditionalBlockGradOp : public ConditionalOp { class ConditionalBlockGradOp : public ConditionalOp {
public: public:
ConditionalBlockGradOp(const std::string &type, ConditionalBlockGradOp(const std::string &type,
......
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <algorithm>
#include <memory>
#include <string>
#include <vector>
#include "paddle/fluid/framework/executor.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/var_type.h"
namespace paddle {
namespace operators {
class ConditionalOp : public framework::OperatorBase {
public:
ConditionalOp(const std::string &type,
const framework::VariableNameMap &inputs,
const framework::VariableNameMap &outputs,
const framework::AttributeMap &attrs)
: OperatorBase(type, inputs, outputs, attrs) {}
protected:
std::vector<const framework::LoDTensor *> InputTensors(
const framework::Scope &scope, const std::string &in_name) const {
std::vector<const framework::LoDTensor *> retv;
auto xs = Inputs(in_name);
retv.resize(xs.size(), nullptr);
std::transform(
xs.begin(), xs.end(), retv.begin(),
[&scope](const std::string &var_name) -> const framework::LoDTensor * {
auto *var = scope.FindVar(var_name);
PADDLE_ENFORCE(var != nullptr, "Cannot find variable %s", var_name);
return &var->Get<framework::LoDTensor>();
});
return retv;
}
bool ScalarCondition(
const std::vector<const framework::LoDTensor *> &ips) const {
if (!(ips.size() == 1UL && ips[0]->IsInitialized())) {
PADDLE_THROW("should have one initialized input as condition");
}
PADDLE_ENFORCE(ips[0]->type() == framework::proto::VarType::BOOL &&
ips[0]->numel() == 1,
"condition input's data type should be bool, "
"numel should be 1, actual numel is %d",
ips[0]->numel());
bool res = false;
if (platform::is_gpu_place(ips[0]->place())) {
#ifdef PADDLE_WITH_CUDA
framework::LoDTensor cpu_tensor;
framework::TensorCopy(*ips[0], platform::CPUPlace(), &cpu_tensor);
platform::DeviceContextPool::Instance().Get(ips[0]->place())->Wait();
res = cpu_tensor.data<bool>()[0];
#endif
} else {
res = ips[0]->data<bool>()[0];
}
return res;
}
};
class ConditionalBlockOpProtoMaker : public framework::OpProtoAndCheckerMaker {
public:
void Make() override {
AddInput("Cond",
"The conditional variable of this operator. If Cond is empty, the "
"whole sub-block will not be executed.")
.AsDuplicable();
AddInput("Input", "The input variables of the sub-block.").AsDuplicable();
AddOutput("Out", "The output variables of the sub-block.").AsDuplicable();
AddOutput("Scope",
"(std::vector<Scope*>) The step scope of conditional block. To "
"unify the conditional block, rnn and while op, the type of "
"scope is std::vector<Scope*>");
AddAttr<framework::BlockDesc *>(
"sub_block", "The step block of conditional block operator");
AddAttr<bool>("is_scalar_condition",
"The conditional variable (Cond) is used as scalar "
"condition.")
.SetDefault(false);
AddComment(R"DOC(Conditional block operator
If `is_scalar_condition` is True, the conditional variable (Cond) is a scalar,
run the operators in sub-block if Cond is True.
If `is_scalar_condition` is False, the conditional variable (Cond) is a vector or
tensor, run the operators in sub-block if all of input variables are not empty.
)DOC");
}
};
} // namespace operators
} // namespace paddle
...@@ -121,8 +121,9 @@ class TensorRTEngineOp : public framework::OperatorBase { ...@@ -121,8 +121,9 @@ class TensorRTEngineOp : public framework::OperatorBase {
// This process will builds a 32-bit trt engine, runs it on the calibration // This process will builds a 32-bit trt engine, runs it on the calibration
// set, and records a histogram for each // set, and records a histogram for each
// tensor of the distribution of activation values. // tensor of the distribution of activation values.
LOG_FIRST_N(INFO, 1) << "The TRT engine: " << engine_key_ LOG_FIRST_N(INFO, 1) << "This process is generating calibration table for "
<< " is running calibration trt int8... "; "Paddle TRT int8...";
int runtime_batch = 1; int runtime_batch = 1;
if (!Singleton<TRTCalibratorEngineManager>::Global().Has(engine_key_)) { if (!Singleton<TRTCalibratorEngineManager>::Global().Has(engine_key_)) {
TRTCalibratorEngine *calib_res = TRTCalibratorEngine *calib_res =
......
...@@ -237,7 +237,7 @@ void BindAnalysisConfig(py::module *m) { ...@@ -237,7 +237,7 @@ void BindAnalysisConfig(py::module *m) {
py::arg("workspace_size") = 1 << 20, py::arg("max_batch_size") = 1, py::arg("workspace_size") = 1 << 20, py::arg("max_batch_size") = 1,
py::arg("min_subgraph_size") = 3, py::arg("min_subgraph_size") = 3,
py::arg("precision_mode") = AnalysisConfig::Precision::kFloat32, py::arg("precision_mode") = AnalysisConfig::Precision::kFloat32,
py::arg("use_static") = true, py::arg("use_calib_mode") = false) py::arg("use_static") = false, py::arg("use_calib_mode") = true)
.def("enable_anakin_engine", &AnalysisConfig::EnableAnakinEngine, .def("enable_anakin_engine", &AnalysisConfig::EnableAnakinEngine,
py::arg("max_batch_size") = 1, py::arg("max_batch_size") = 1,
py::arg("max_input_shape") = py::arg("max_input_shape") =
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册