未验证 提交 dccdc719 编写于 作者: W Wangzheee 提交者: GitHub

[Paddle-Inference] add Paddle Trt config: with_interleaved (#38884)

* add Paddle Trt config: with_interleaved
上级 7f123456
......@@ -212,6 +212,7 @@ struct Argument {
bool);
DECL_ARGUMENT_FIELD(tensorrt_use_calib_mode, TensorRtUseCalibMode, bool);
DECL_ARGUMENT_FIELD(tensorrt_use_oss, TensorRtUseOSS, bool);
DECL_ARGUMENT_FIELD(tensorrt_with_interleaved, TensorRtWithInterleaved, bool);
DECL_ARGUMENT_FIELD(tensorrt_shape_range_info_path,
TensorRtShapeRangeInfoPath, std::string);
DECL_ARGUMENT_FIELD(tensorrt_tuned_dynamic_shape, TensorRtTunedDynamicShape,
......
......@@ -108,6 +108,8 @@ void IRPassManager::CreatePasses(Argument *argument,
pass->Set("enable_int8", new bool(enable_int8));
pass->Set("use_calib_mode", new bool(use_calib_mode));
pass->Set("use_oss", new bool(argument->tensorrt_use_oss()));
pass->Set("with_interleaved",
new bool(argument->tensorrt_with_interleaved()));
pass->Set("precision_mode",
new AnalysisConfig::Precision(precision_mode));
......
......@@ -369,6 +369,7 @@ void TensorRtSubgraphPass::CreateTensorRTOp(
Get<int>("gpu_device_id"), min_input_shape, max_input_shape,
opt_input_shape, disable_trt_plugin_fp16);
trt_engine->SetUseOSS(Get<bool>("use_oss"));
trt_engine->SetWithInterleaved(Get<bool>("with_interleaved"));
trt_engine->SetUseDLA(Get<bool>("trt_use_dla"));
trt_engine->SetDLACore(Get<int>("trt_dla_core"));
......
......@@ -189,6 +189,7 @@ AnalysisConfig::AnalysisConfig(const AnalysisConfig &other) {
CP_MEMBER(trt_use_static_engine_);
CP_MEMBER(trt_use_calib_mode_);
CP_MEMBER(trt_use_oss_);
CP_MEMBER(trt_with_interleaved_);
CP_MEMBER(trt_tuned_dynamic_shape_);
CP_MEMBER(trt_allow_build_at_runtime_);
CP_MEMBER(collect_shape_range_info_);
......@@ -864,6 +865,8 @@ std::string AnalysisConfig::Summary() {
: "false"});
os.InsertRow({"tensorrt_use_oss", trt_use_oss_ ? "true" : "false"});
os.InsertRow({"tensorrt_with_interleaved",
trt_with_interleaved_ ? "true" : "false"});
os.InsertRow({"tensorrt_use_dla", trt_use_dla_ ? "true" : "false"});
if (trt_use_dla_) {
os.InsertRow({"tensorrt_dla_core", std::to_string(trt_dla_core_)});
......
......@@ -605,6 +605,7 @@ void AnalysisPredictor::PrepareArgument() {
argument_.SetTensorRtUseStaticEngine(config_.trt_use_static_engine_);
argument_.SetTensorRtUseCalibMode(config_.trt_use_calib_mode_);
argument_.SetTensorRtUseOSS(config_.trt_use_oss_);
argument_.SetTensorRtWithInterleaved(config_.trt_with_interleaved_);
argument_.SetMinInputShape(config_.min_input_shape_);
argument_.SetMaxInputShape(config_.max_input_shape_);
argument_.SetOptimInputShape(config_.optim_input_shape_);
......@@ -1603,5 +1604,11 @@ bool InternalUtils::RunWithExternalStream(paddle_infer::Predictor *p,
#endif
return false;
}
void InternalUtils::UpdateConfigInterleaved(paddle_infer::Config *c,
bool with_interleaved) {
#ifdef PADDLE_WITH_CUDA
c->trt_with_interleaved_ = with_interleaved;
#endif
}
} // namespace experimental
} // namespace paddle_infer
......@@ -796,6 +796,7 @@ struct PD_INFER_DECL AnalysisConfig {
bool trt_use_static_engine_{false};
bool trt_use_calib_mode_{true};
bool trt_use_oss_{false};
bool trt_with_interleaved_{false};
bool trt_use_dla_{false};
int trt_dla_core_{0};
std::map<std::string, std::vector<int>> min_input_shape_{};
......@@ -883,6 +884,7 @@ struct PD_INFER_DECL AnalysisConfig {
// So we release the memory when the predictor is set up.
mutable bool is_valid_{true};
std::string opt_cache_dir_;
friend class paddle_infer::experimental::InternalUtils;
};
} // namespace paddle
......@@ -405,3 +405,24 @@ PD_INFER_DECL std::shared_ptr<framework::Cipher> MakeCipher(
const std::string& config_file);
} // namespace paddle
// forward declation
using cudaStream_t = struct CUstream_st*;
using hipStream_t = struct ihipStream_t*;
namespace paddle_infer {
class Predictor;
using Config = paddle::AnalysisConfig;
namespace experimental {
class PD_INFER_DECL InternalUtils {
public:
// Note: Can only be used under thread_local semantics.
static bool RunWithExternalStream(paddle_infer::Predictor* pred,
cudaStream_t stream);
static bool RunWithExternalStream(paddle_infer::Predictor* pred,
hipStream_t stream);
static void UpdateConfigInterleaved(paddle_infer::Config* c,
bool with_interleaved);
};
} // namespace experimental
} // namespace paddle_infer
......@@ -41,27 +41,11 @@ limitations under the License. */
/// \since 2.0.0-beta
///
// forward declation
using cudaStream_t = struct CUstream_st*;
using hipStream_t = struct ihipStream_t*;
namespace paddle_infer {
using PrecisionType = paddle::AnalysisConfig::Precision;
using Config = paddle::AnalysisConfig;
class Predictor;
namespace experimental {
class PD_INFER_DECL InternalUtils {
public:
// Note: Can only be used under thread_local semantics.
static bool RunWithExternalStream(paddle_infer::Predictor* pred,
cudaStream_t stream);
static bool RunWithExternalStream(paddle_infer::Predictor* pred,
hipStream_t stream);
};
} // namespace experimental
///
/// \class Predictor
///
......
......@@ -45,7 +45,7 @@ class BatchNormOpConverter : public OpConverter {
auto* Scale_v = scope.FindVar(op_desc.Input("Scale").front());
auto* Variance_v = scope.FindVar(op_desc.Input("Variance").front());
const float eps = BOOST_GET_CONST(float, op_desc.GetAttr("epsilon"));
auto output_name = op_desc.Output("Y").front();
PADDLE_ENFORCE_NOT_NULL(
Bias_v,
platform::errors::NotFound(
......@@ -145,6 +145,10 @@ class BatchNormOpConverter : public OpConverter {
expand_layer = TRT_ENGINE_ADD_LAYER(engine_, Shuffle, *X);
expand_layer->setReshapeDimensions(expand_shape);
X = expand_layer->getOutput(0);
expand_layer->getOutput(0)->setName(
("reshape_before_batchnorm_out: " + output_name).c_str());
expand_layer->setName(
("BN_Shuffle: (Output: " + output_name + ")").c_str());
}
layer = TRT_ENGINE_ADD_LAYER(engine_, ScaleNd, *X,
......@@ -152,12 +156,13 @@ class BatchNormOpConverter : public OpConverter {
shift_weights.get(), scale_weights.get(),
power_weights.get(), dynamic_shape_offset);
auto output_name = op_desc.Output("Y").front();
engine_->SetWeights(op_desc.Input("Bias").front(),
std::move(combile_bias_tensor));
engine_->SetWeights(op_desc.Input("Scale").front(),
std::move(combile_scale_tensor));
if (x_dim.nbDims < 3 + dynamic_shape_offset) {
layer->getOutput(0)->setName("batch_norm_out");
layer->setName(("BN: ScaleNd: (Output: " + output_name + ")").c_str());
nvinfer1::Dims squeeze_shape;
squeeze_shape.nbDims = x_dim.nbDims;
for (int i = 0; i < squeeze_shape.nbDims; i++) {
......@@ -166,10 +171,12 @@ class BatchNormOpConverter : public OpConverter {
squeeze_layer =
TRT_ENGINE_ADD_LAYER(engine_, Shuffle, *(layer->getOutput(0)));
squeeze_layer->setReshapeDimensions(squeeze_shape);
layer = static_cast<nvinfer1::ILayer*>(squeeze_layer);
RreplenishLayerAndOutput(squeeze_layer, "batchnorm_add_scale",
{output_name}, test_mode);
} else {
RreplenishLayerAndOutput(layer, "batchnorm_add_scale", {output_name},
test_mode);
}
RreplenishLayerAndOutput(layer, "batchnorm_add_scale", {output_name},
test_mode);
}
};
......
......@@ -50,6 +50,7 @@ class ElementwiseWeightOpConverter : public OpConverter {
op_desc.Input("Y").front().c_str()));
auto* Y_t = Y_v->GetMutable<framework::LoDTensor>();
float* weight_data = nullptr;
auto output_name = op_desc.Output("Out")[0];
weight_data =
engine_->GetWeightCPUData(op_desc.Input("Y").front(), Y_t, false);
nvinfer1::Dims dims_x = X->getDimensions();
......@@ -80,6 +81,10 @@ class ElementwiseWeightOpConverter : public OpConverter {
expand_layer = TRT_ENGINE_ADD_LAYER(engine_, Shuffle, *X);
expand_layer->setReshapeDimensions(expand_shape);
X = expand_layer->getOutput(0);
expand_layer->getOutput(0)->setName(
("elementwise_reshape_out: " + output_name).c_str());
expand_layer->setName(
("Elewise: Shuffle: (Output: " + output_name + ")").c_str());
}
if (op_type_ == "add") {
nvinfer1::IScaleLayer* scale_layer = TRT_ENGINE_ADD_LAYER(
......@@ -101,11 +106,12 @@ class ElementwiseWeightOpConverter : public OpConverter {
squeeze_layer =
TRT_ENGINE_ADD_LAYER(engine_, Shuffle, *(layer->getOutput(0)));
squeeze_layer->setReshapeDimensions(squeeze_shape);
layer = static_cast<nvinfer1::ILayer*>(squeeze_layer);
RreplenishLayerAndOutput(squeeze_layer, "elementwise_" + op_type_,
{output_name}, test_mode);
} else {
RreplenishLayerAndOutput(layer, "elementwise_" + op_type_,
{output_name}, test_mode);
}
auto output_name = op_desc.Output("Out")[0];
RreplenishLayerAndOutput(layer, "elementwise_" + op_type_, {output_name},
test_mode);
if (op_desc.HasAttr("enable_int8")) {
#if IS_TRT_VERSION_GE(5000)
CHECK(op_desc.HasAttr("X_scale"));
......
......@@ -56,6 +56,8 @@ class GatherOpConverter : public OpConverter {
index_shape.d[0] = -1;
reshape_layer->setReshapeDimensions(index_shape);
reshape_layer->setName(
("Gather: Shuffle: (Output: " + output_name + ")").c_str());
auto layer = TRT_ENGINE_ADD_LAYER(engine_, Gather, *input_tensor,
*reshape_layer->getOutput(0), axis);
......
......@@ -144,28 +144,44 @@ class OpConverter {
it->SetEngine(engine);
(*it)(op, scope, test_mode);
bool has_out_scale = op_desc.HasAttr("out_threshold");
if (has_out_scale) {
float out_scale =
BOOST_GET_CONST(float, op_desc.GetAttr("out_threshold"));
std::string output_name = "";
if (op_desc.HasOutput("Output")) {
output_name = op_desc.Output("Output").front();
} else if (op_desc.HasOutput("Out")) {
output_name = op_desc.Output("Out").front();
} else if (op_desc.HasOutput("Y")) {
output_name = op_desc.Output("Y").front();
} else {
PADDLE_THROW(
platform::errors::NotFound("Op %s has out threshold but doesn't "
"have an output named \"Output\", "
"\"Out\" or \"Y\".",
op_desc.Type()));
size_t output_num = op_desc.OutputNames().size();
if (output_num == 1) { // The number of output is 1
if (op_desc.HasAttr("out_threshold")) {
float out_scale =
BOOST_GET_CONST(float, op_desc.GetAttr("out_threshold"));
std::string output_name = "";
if (op_desc.HasOutput("Output")) {
output_name = op_desc.Output("Output").front();
} else if (op_desc.HasOutput("Out")) {
output_name = op_desc.Output("Out").front();
} else if (op_desc.HasOutput("Y")) {
output_name = op_desc.Output("Y").front();
} else {
PADDLE_THROW(
platform::errors::NotFound("Op %s has out threshold but doesn't "
"have an output named \"Output\", "
"\"Out\" or \"Y\".",
op_desc.Type()));
}
auto* output_itensor = engine->GetITensor(output_name);
engine->SetTensorDynamicRange(output_itensor, out_scale);
VLOG(1) << "Set out scale = " << out_scale << " for tensor "
<< output_name << ".";
}
} else if (output_num > 1) { // The number of outputs greater than 1
for (size_t i = 0; i < output_num; ++i) {
if (op_desc.HasAttr("out_" + std::to_string(i) + "_threshold")) {
float out_scale = BOOST_GET_CONST(
float,
op_desc.GetAttr("out_" + std::to_string(i) + "_threshold"));
std::string output_name =
op_desc.Output(op_desc.OutputNames()[i]).front();
auto* output_itensor = engine->GetITensor(output_name);
engine->SetTensorDynamicRange(output_itensor, out_scale);
VLOG(1) << "Set out scale = " << out_scale << " for tensor "
<< output_name << ".";
}
}
auto* output_itensor = engine->GetITensor(output_name);
engine->SetTensorDynamicRange(output_itensor, out_scale);
VLOG(1) << "Set out scale = " << out_scale << " for tensor "
<< output_name << ".";
}
}
......
......@@ -89,21 +89,34 @@ class ScaleOpConverter : public OpConverter {
expand_layer = TRT_ENGINE_ADD_LAYER(engine_, Shuffle, *input);
expand_layer->setReshapeDimensions(expand_shape);
input = expand_layer->getOutput(0);
expand_layer->getOutput(0)->setName(
("before_reshape_out: " + out_name).c_str());
expand_layer->setName(
("Scale: before_reshape (Output: " + out_name + ")").c_str());
}
if (bias_after_scale) {
layer = TRT_ENGINE_ADD_LAYER(
engine_, Scale, *input, nvinfer1::ScaleMode::kUNIFORM,
shift_weights.get(), scale_weights.get(), power_weights.get());
layer->getOutput(0)->setName(
("bias_after_scale_out: " + out_name).c_str());
layer->setName(("Scale: scale (Output: " + out_name + ")").c_str());
} else {
// add bias
layer = TRT_ENGINE_ADD_LAYER(
engine_, Scale, *(input), nvinfer1::ScaleMode::kUNIFORM,
shift_weights.get(), power_weights.get(), power_weights.get());
layer->getOutput(0)->setName(
("bias_before_scale:bias_out: " + out_name).c_str());
layer->setName(("Scale: scale_bias (Output: " + out_name + ")").c_str());
// mul scale
layer = TRT_ENGINE_ADD_LAYER(
engine_, Scale, *(layer->getOutput(0)), nvinfer1::ScaleMode::kUNIFORM,
power_weights.get(), scale_weights.get(), power_weights.get());
layer->getOutput(0)->setName(
("bias_before_scale:scale_out: " + out_name).c_str());
layer->setName(("Scale: scale_scale (Output: " + out_name + ")").c_str());
}
PADDLE_ENFORCE_EQ(layer != nullptr, true,
......@@ -119,6 +132,9 @@ class ScaleOpConverter : public OpConverter {
TRT_ENGINE_ADD_LAYER(engine_, Shuffle, *(layer->getOutput(0)));
squeeze_layer->setReshapeDimensions(squeeze_shape);
layer = static_cast<nvinfer1::ILayer*>(squeeze_layer);
layer->getOutput(0)->setName(("after_reshape_out: " + out_name).c_str());
layer->setName(
("Scale: Shuffle_reshape (Output: " + out_name + ")").c_str());
}
RreplenishLayerAndOutput(layer, "scale", {out_name}, test_mode);
}
......
......@@ -30,10 +30,11 @@ class SliceOpConverter : public OpConverter {
framework::OpDesc op_desc(op, nullptr);
// Declare inputs
auto* input = engine_->GetITensor(op_desc.Input("Input")[0]);
auto output_name = op_desc.Output("Out")[0];
float out_scale = 1;
if (op_desc.HasAttr("out_threshold")) {
float out_scale =
BOOST_GET_CONST(float, op_desc.GetAttr("out_threshold"));
out_scale = BOOST_GET_CONST(float, op_desc.GetAttr("out_threshold"));
engine_->SetTensorDynamicRange(input, out_scale);
}
......@@ -71,12 +72,22 @@ class SliceOpConverter : public OpConverter {
nvinfer1::ILayer* layer = nullptr;
if (engine_->with_dynamic_shape()) {
#if IS_TRT_VERSION_GE(6000)
if (engine_->use_oss() && engine_->with_ernie()) {
std::vector<nvinfer1::ITensor*> plugin_inputs;
// plugin_inputs.emplace_back(trans_layer->getOutput(0));
plugin_inputs.emplace_back(input);
if (engine_->with_interleaved()) {
auto* shuffler_slice = TRT_ENGINE_ADD_LAYER(engine_, Shuffle, *input);
nvinfer1::Permutation transpose_embed{2, 1, 0, 3};
shuffler_slice->setSecondTranspose(transpose_embed);
engine_->SetTensorDynamicRange(shuffler_slice->getOutput(0),
out_scale);
shuffler_slice->setName(
("SpecialSlice_interleaved: Shuffle: (Output: " + output_name +
")")
.c_str());
plugin_inputs.emplace_back(shuffler_slice->getOutput(0));
} else {
plugin_inputs.emplace_back(input);
}
std::string pos_name;
if (engine_->Has("ernie_pos_name")) {
pos_name = engine_->Get<std::string>("ernie_pos_name");
......@@ -99,11 +110,6 @@ class SliceOpConverter : public OpConverter {
new plugin::SlicePluginDynamic(starts, ends, axes, with_fp16);
layer = engine_->AddDynamicPlugin(&input, 1, plugin);
}
#else
PADDLE_THROW(platform::errors::Fatal(
"You are running the TRT Dynamic Shape mode, need to confirm that "
"your TRT version is no less than 6.0"));
#endif
} else {
bool with_fp16 =
engine_->WithFp16() && !engine_->disable_trt_plugin_fp16();
......@@ -111,8 +117,6 @@ class SliceOpConverter : public OpConverter {
new plugin::SlicePlugin(starts, ends, axes, with_fp16);
layer = engine_->AddPlugin(&input, 1, plugin);
}
auto output_name = op_desc.Output("Out")[0];
RreplenishLayerAndOutput(layer, "slice", {output_name}, test_mode);
}
};
......
......@@ -407,6 +407,9 @@ class TensorRTEngine {
void SetUseDLA(bool use_dla) { use_dla_ = use_dla; }
void SetDLACore(int dla_core) { dla_core_ = dla_core; }
void SetWithErnie(bool with_ernie) { with_ernie_ = with_ernie; }
void SetWithInterleaved(bool with_interleaved) {
with_interleaved_ = with_interleaved;
}
void ClearWeights() {
for (auto& weight_pair : weight_map) {
......@@ -480,6 +483,7 @@ class TensorRTEngine {
bool use_oss() { return use_oss_; }
bool with_ernie() { return with_ernie_; }
bool with_interleaved() { return with_interleaved_; }
bool disable_trt_plugin_fp16() { return disable_trt_plugin_fp16_; }
bool with_dynamic_shape() { return with_dynamic_shape_; }
AnalysisConfig::Precision precision() { return precision_; }
......@@ -612,6 +616,7 @@ class TensorRTEngine {
bool use_dla_{false};
int dla_core_{0};
bool with_ernie_{false};
bool with_interleaved_{false};
nvinfer1::ILogger& logger_;
// max data size for the buffers.
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册