未验证 提交 dccdc719 编写于 作者: W Wangzheee 提交者: GitHub

[Paddle-Inference] add Paddle Trt config: with_interleaved (#38884)

* add Paddle Trt config: with_interleaved
上级 7f123456
...@@ -212,6 +212,7 @@ struct Argument { ...@@ -212,6 +212,7 @@ struct Argument {
bool); bool);
DECL_ARGUMENT_FIELD(tensorrt_use_calib_mode, TensorRtUseCalibMode, bool); DECL_ARGUMENT_FIELD(tensorrt_use_calib_mode, TensorRtUseCalibMode, bool);
DECL_ARGUMENT_FIELD(tensorrt_use_oss, TensorRtUseOSS, bool); DECL_ARGUMENT_FIELD(tensorrt_use_oss, TensorRtUseOSS, bool);
DECL_ARGUMENT_FIELD(tensorrt_with_interleaved, TensorRtWithInterleaved, bool);
DECL_ARGUMENT_FIELD(tensorrt_shape_range_info_path, DECL_ARGUMENT_FIELD(tensorrt_shape_range_info_path,
TensorRtShapeRangeInfoPath, std::string); TensorRtShapeRangeInfoPath, std::string);
DECL_ARGUMENT_FIELD(tensorrt_tuned_dynamic_shape, TensorRtTunedDynamicShape, DECL_ARGUMENT_FIELD(tensorrt_tuned_dynamic_shape, TensorRtTunedDynamicShape,
......
...@@ -108,6 +108,8 @@ void IRPassManager::CreatePasses(Argument *argument, ...@@ -108,6 +108,8 @@ void IRPassManager::CreatePasses(Argument *argument,
pass->Set("enable_int8", new bool(enable_int8)); pass->Set("enable_int8", new bool(enable_int8));
pass->Set("use_calib_mode", new bool(use_calib_mode)); pass->Set("use_calib_mode", new bool(use_calib_mode));
pass->Set("use_oss", new bool(argument->tensorrt_use_oss())); pass->Set("use_oss", new bool(argument->tensorrt_use_oss()));
pass->Set("with_interleaved",
new bool(argument->tensorrt_with_interleaved()));
pass->Set("precision_mode", pass->Set("precision_mode",
new AnalysisConfig::Precision(precision_mode)); new AnalysisConfig::Precision(precision_mode));
......
...@@ -369,6 +369,7 @@ void TensorRtSubgraphPass::CreateTensorRTOp( ...@@ -369,6 +369,7 @@ void TensorRtSubgraphPass::CreateTensorRTOp(
Get<int>("gpu_device_id"), min_input_shape, max_input_shape, Get<int>("gpu_device_id"), min_input_shape, max_input_shape,
opt_input_shape, disable_trt_plugin_fp16); opt_input_shape, disable_trt_plugin_fp16);
trt_engine->SetUseOSS(Get<bool>("use_oss")); trt_engine->SetUseOSS(Get<bool>("use_oss"));
trt_engine->SetWithInterleaved(Get<bool>("with_interleaved"));
trt_engine->SetUseDLA(Get<bool>("trt_use_dla")); trt_engine->SetUseDLA(Get<bool>("trt_use_dla"));
trt_engine->SetDLACore(Get<int>("trt_dla_core")); trt_engine->SetDLACore(Get<int>("trt_dla_core"));
......
...@@ -189,6 +189,7 @@ AnalysisConfig::AnalysisConfig(const AnalysisConfig &other) { ...@@ -189,6 +189,7 @@ AnalysisConfig::AnalysisConfig(const AnalysisConfig &other) {
CP_MEMBER(trt_use_static_engine_); CP_MEMBER(trt_use_static_engine_);
CP_MEMBER(trt_use_calib_mode_); CP_MEMBER(trt_use_calib_mode_);
CP_MEMBER(trt_use_oss_); CP_MEMBER(trt_use_oss_);
CP_MEMBER(trt_with_interleaved_);
CP_MEMBER(trt_tuned_dynamic_shape_); CP_MEMBER(trt_tuned_dynamic_shape_);
CP_MEMBER(trt_allow_build_at_runtime_); CP_MEMBER(trt_allow_build_at_runtime_);
CP_MEMBER(collect_shape_range_info_); CP_MEMBER(collect_shape_range_info_);
...@@ -864,6 +865,8 @@ std::string AnalysisConfig::Summary() { ...@@ -864,6 +865,8 @@ std::string AnalysisConfig::Summary() {
: "false"}); : "false"});
os.InsertRow({"tensorrt_use_oss", trt_use_oss_ ? "true" : "false"}); os.InsertRow({"tensorrt_use_oss", trt_use_oss_ ? "true" : "false"});
os.InsertRow({"tensorrt_with_interleaved",
trt_with_interleaved_ ? "true" : "false"});
os.InsertRow({"tensorrt_use_dla", trt_use_dla_ ? "true" : "false"}); os.InsertRow({"tensorrt_use_dla", trt_use_dla_ ? "true" : "false"});
if (trt_use_dla_) { if (trt_use_dla_) {
os.InsertRow({"tensorrt_dla_core", std::to_string(trt_dla_core_)}); os.InsertRow({"tensorrt_dla_core", std::to_string(trt_dla_core_)});
......
...@@ -605,6 +605,7 @@ void AnalysisPredictor::PrepareArgument() { ...@@ -605,6 +605,7 @@ void AnalysisPredictor::PrepareArgument() {
argument_.SetTensorRtUseStaticEngine(config_.trt_use_static_engine_); argument_.SetTensorRtUseStaticEngine(config_.trt_use_static_engine_);
argument_.SetTensorRtUseCalibMode(config_.trt_use_calib_mode_); argument_.SetTensorRtUseCalibMode(config_.trt_use_calib_mode_);
argument_.SetTensorRtUseOSS(config_.trt_use_oss_); argument_.SetTensorRtUseOSS(config_.trt_use_oss_);
argument_.SetTensorRtWithInterleaved(config_.trt_with_interleaved_);
argument_.SetMinInputShape(config_.min_input_shape_); argument_.SetMinInputShape(config_.min_input_shape_);
argument_.SetMaxInputShape(config_.max_input_shape_); argument_.SetMaxInputShape(config_.max_input_shape_);
argument_.SetOptimInputShape(config_.optim_input_shape_); argument_.SetOptimInputShape(config_.optim_input_shape_);
...@@ -1603,5 +1604,11 @@ bool InternalUtils::RunWithExternalStream(paddle_infer::Predictor *p, ...@@ -1603,5 +1604,11 @@ bool InternalUtils::RunWithExternalStream(paddle_infer::Predictor *p,
#endif #endif
return false; return false;
} }
void InternalUtils::UpdateConfigInterleaved(paddle_infer::Config *c,
bool with_interleaved) {
#ifdef PADDLE_WITH_CUDA
c->trt_with_interleaved_ = with_interleaved;
#endif
}
} // namespace experimental } // namespace experimental
} // namespace paddle_infer } // namespace paddle_infer
...@@ -796,6 +796,7 @@ struct PD_INFER_DECL AnalysisConfig { ...@@ -796,6 +796,7 @@ struct PD_INFER_DECL AnalysisConfig {
bool trt_use_static_engine_{false}; bool trt_use_static_engine_{false};
bool trt_use_calib_mode_{true}; bool trt_use_calib_mode_{true};
bool trt_use_oss_{false}; bool trt_use_oss_{false};
bool trt_with_interleaved_{false};
bool trt_use_dla_{false}; bool trt_use_dla_{false};
int trt_dla_core_{0}; int trt_dla_core_{0};
std::map<std::string, std::vector<int>> min_input_shape_{}; std::map<std::string, std::vector<int>> min_input_shape_{};
...@@ -883,6 +884,7 @@ struct PD_INFER_DECL AnalysisConfig { ...@@ -883,6 +884,7 @@ struct PD_INFER_DECL AnalysisConfig {
// So we release the memory when the predictor is set up. // So we release the memory when the predictor is set up.
mutable bool is_valid_{true}; mutable bool is_valid_{true};
std::string opt_cache_dir_; std::string opt_cache_dir_;
friend class paddle_infer::experimental::InternalUtils;
}; };
} // namespace paddle } // namespace paddle
...@@ -405,3 +405,24 @@ PD_INFER_DECL std::shared_ptr<framework::Cipher> MakeCipher( ...@@ -405,3 +405,24 @@ PD_INFER_DECL std::shared_ptr<framework::Cipher> MakeCipher(
const std::string& config_file); const std::string& config_file);
} // namespace paddle } // namespace paddle
// forward declation
using cudaStream_t = struct CUstream_st*;
using hipStream_t = struct ihipStream_t*;
namespace paddle_infer {
class Predictor;
using Config = paddle::AnalysisConfig;
namespace experimental {
class PD_INFER_DECL InternalUtils {
public:
// Note: Can only be used under thread_local semantics.
static bool RunWithExternalStream(paddle_infer::Predictor* pred,
cudaStream_t stream);
static bool RunWithExternalStream(paddle_infer::Predictor* pred,
hipStream_t stream);
static void UpdateConfigInterleaved(paddle_infer::Config* c,
bool with_interleaved);
};
} // namespace experimental
} // namespace paddle_infer
...@@ -41,27 +41,11 @@ limitations under the License. */ ...@@ -41,27 +41,11 @@ limitations under the License. */
/// \since 2.0.0-beta /// \since 2.0.0-beta
/// ///
// forward declation
using cudaStream_t = struct CUstream_st*;
using hipStream_t = struct ihipStream_t*;
namespace paddle_infer { namespace paddle_infer {
using PrecisionType = paddle::AnalysisConfig::Precision; using PrecisionType = paddle::AnalysisConfig::Precision;
using Config = paddle::AnalysisConfig; using Config = paddle::AnalysisConfig;
class Predictor;
namespace experimental {
class PD_INFER_DECL InternalUtils {
public:
// Note: Can only be used under thread_local semantics.
static bool RunWithExternalStream(paddle_infer::Predictor* pred,
cudaStream_t stream);
static bool RunWithExternalStream(paddle_infer::Predictor* pred,
hipStream_t stream);
};
} // namespace experimental
/// ///
/// \class Predictor /// \class Predictor
/// ///
......
...@@ -45,7 +45,7 @@ class BatchNormOpConverter : public OpConverter { ...@@ -45,7 +45,7 @@ class BatchNormOpConverter : public OpConverter {
auto* Scale_v = scope.FindVar(op_desc.Input("Scale").front()); auto* Scale_v = scope.FindVar(op_desc.Input("Scale").front());
auto* Variance_v = scope.FindVar(op_desc.Input("Variance").front()); auto* Variance_v = scope.FindVar(op_desc.Input("Variance").front());
const float eps = BOOST_GET_CONST(float, op_desc.GetAttr("epsilon")); const float eps = BOOST_GET_CONST(float, op_desc.GetAttr("epsilon"));
auto output_name = op_desc.Output("Y").front();
PADDLE_ENFORCE_NOT_NULL( PADDLE_ENFORCE_NOT_NULL(
Bias_v, Bias_v,
platform::errors::NotFound( platform::errors::NotFound(
...@@ -145,6 +145,10 @@ class BatchNormOpConverter : public OpConverter { ...@@ -145,6 +145,10 @@ class BatchNormOpConverter : public OpConverter {
expand_layer = TRT_ENGINE_ADD_LAYER(engine_, Shuffle, *X); expand_layer = TRT_ENGINE_ADD_LAYER(engine_, Shuffle, *X);
expand_layer->setReshapeDimensions(expand_shape); expand_layer->setReshapeDimensions(expand_shape);
X = expand_layer->getOutput(0); X = expand_layer->getOutput(0);
expand_layer->getOutput(0)->setName(
("reshape_before_batchnorm_out: " + output_name).c_str());
expand_layer->setName(
("BN_Shuffle: (Output: " + output_name + ")").c_str());
} }
layer = TRT_ENGINE_ADD_LAYER(engine_, ScaleNd, *X, layer = TRT_ENGINE_ADD_LAYER(engine_, ScaleNd, *X,
...@@ -152,12 +156,13 @@ class BatchNormOpConverter : public OpConverter { ...@@ -152,12 +156,13 @@ class BatchNormOpConverter : public OpConverter {
shift_weights.get(), scale_weights.get(), shift_weights.get(), scale_weights.get(),
power_weights.get(), dynamic_shape_offset); power_weights.get(), dynamic_shape_offset);
auto output_name = op_desc.Output("Y").front();
engine_->SetWeights(op_desc.Input("Bias").front(), engine_->SetWeights(op_desc.Input("Bias").front(),
std::move(combile_bias_tensor)); std::move(combile_bias_tensor));
engine_->SetWeights(op_desc.Input("Scale").front(), engine_->SetWeights(op_desc.Input("Scale").front(),
std::move(combile_scale_tensor)); std::move(combile_scale_tensor));
if (x_dim.nbDims < 3 + dynamic_shape_offset) { if (x_dim.nbDims < 3 + dynamic_shape_offset) {
layer->getOutput(0)->setName("batch_norm_out");
layer->setName(("BN: ScaleNd: (Output: " + output_name + ")").c_str());
nvinfer1::Dims squeeze_shape; nvinfer1::Dims squeeze_shape;
squeeze_shape.nbDims = x_dim.nbDims; squeeze_shape.nbDims = x_dim.nbDims;
for (int i = 0; i < squeeze_shape.nbDims; i++) { for (int i = 0; i < squeeze_shape.nbDims; i++) {
...@@ -166,11 +171,13 @@ class BatchNormOpConverter : public OpConverter { ...@@ -166,11 +171,13 @@ class BatchNormOpConverter : public OpConverter {
squeeze_layer = squeeze_layer =
TRT_ENGINE_ADD_LAYER(engine_, Shuffle, *(layer->getOutput(0))); TRT_ENGINE_ADD_LAYER(engine_, Shuffle, *(layer->getOutput(0)));
squeeze_layer->setReshapeDimensions(squeeze_shape); squeeze_layer->setReshapeDimensions(squeeze_shape);
layer = static_cast<nvinfer1::ILayer*>(squeeze_layer); RreplenishLayerAndOutput(squeeze_layer, "batchnorm_add_scale",
} {output_name}, test_mode);
} else {
RreplenishLayerAndOutput(layer, "batchnorm_add_scale", {output_name}, RreplenishLayerAndOutput(layer, "batchnorm_add_scale", {output_name},
test_mode); test_mode);
} }
}
}; };
} // namespace tensorrt } // namespace tensorrt
......
...@@ -50,6 +50,7 @@ class ElementwiseWeightOpConverter : public OpConverter { ...@@ -50,6 +50,7 @@ class ElementwiseWeightOpConverter : public OpConverter {
op_desc.Input("Y").front().c_str())); op_desc.Input("Y").front().c_str()));
auto* Y_t = Y_v->GetMutable<framework::LoDTensor>(); auto* Y_t = Y_v->GetMutable<framework::LoDTensor>();
float* weight_data = nullptr; float* weight_data = nullptr;
auto output_name = op_desc.Output("Out")[0];
weight_data = weight_data =
engine_->GetWeightCPUData(op_desc.Input("Y").front(), Y_t, false); engine_->GetWeightCPUData(op_desc.Input("Y").front(), Y_t, false);
nvinfer1::Dims dims_x = X->getDimensions(); nvinfer1::Dims dims_x = X->getDimensions();
...@@ -80,6 +81,10 @@ class ElementwiseWeightOpConverter : public OpConverter { ...@@ -80,6 +81,10 @@ class ElementwiseWeightOpConverter : public OpConverter {
expand_layer = TRT_ENGINE_ADD_LAYER(engine_, Shuffle, *X); expand_layer = TRT_ENGINE_ADD_LAYER(engine_, Shuffle, *X);
expand_layer->setReshapeDimensions(expand_shape); expand_layer->setReshapeDimensions(expand_shape);
X = expand_layer->getOutput(0); X = expand_layer->getOutput(0);
expand_layer->getOutput(0)->setName(
("elementwise_reshape_out: " + output_name).c_str());
expand_layer->setName(
("Elewise: Shuffle: (Output: " + output_name + ")").c_str());
} }
if (op_type_ == "add") { if (op_type_ == "add") {
nvinfer1::IScaleLayer* scale_layer = TRT_ENGINE_ADD_LAYER( nvinfer1::IScaleLayer* scale_layer = TRT_ENGINE_ADD_LAYER(
...@@ -101,11 +106,12 @@ class ElementwiseWeightOpConverter : public OpConverter { ...@@ -101,11 +106,12 @@ class ElementwiseWeightOpConverter : public OpConverter {
squeeze_layer = squeeze_layer =
TRT_ENGINE_ADD_LAYER(engine_, Shuffle, *(layer->getOutput(0))); TRT_ENGINE_ADD_LAYER(engine_, Shuffle, *(layer->getOutput(0)));
squeeze_layer->setReshapeDimensions(squeeze_shape); squeeze_layer->setReshapeDimensions(squeeze_shape);
layer = static_cast<nvinfer1::ILayer*>(squeeze_layer); RreplenishLayerAndOutput(squeeze_layer, "elementwise_" + op_type_,
{output_name}, test_mode);
} else {
RreplenishLayerAndOutput(layer, "elementwise_" + op_type_,
{output_name}, test_mode);
} }
auto output_name = op_desc.Output("Out")[0];
RreplenishLayerAndOutput(layer, "elementwise_" + op_type_, {output_name},
test_mode);
if (op_desc.HasAttr("enable_int8")) { if (op_desc.HasAttr("enable_int8")) {
#if IS_TRT_VERSION_GE(5000) #if IS_TRT_VERSION_GE(5000)
CHECK(op_desc.HasAttr("X_scale")); CHECK(op_desc.HasAttr("X_scale"));
......
...@@ -56,6 +56,8 @@ class GatherOpConverter : public OpConverter { ...@@ -56,6 +56,8 @@ class GatherOpConverter : public OpConverter {
index_shape.d[0] = -1; index_shape.d[0] = -1;
reshape_layer->setReshapeDimensions(index_shape); reshape_layer->setReshapeDimensions(index_shape);
reshape_layer->setName(
("Gather: Shuffle: (Output: " + output_name + ")").c_str());
auto layer = TRT_ENGINE_ADD_LAYER(engine_, Gather, *input_tensor, auto layer = TRT_ENGINE_ADD_LAYER(engine_, Gather, *input_tensor,
*reshape_layer->getOutput(0), axis); *reshape_layer->getOutput(0), axis);
......
...@@ -144,8 +144,9 @@ class OpConverter { ...@@ -144,8 +144,9 @@ class OpConverter {
it->SetEngine(engine); it->SetEngine(engine);
(*it)(op, scope, test_mode); (*it)(op, scope, test_mode);
bool has_out_scale = op_desc.HasAttr("out_threshold"); size_t output_num = op_desc.OutputNames().size();
if (has_out_scale) { if (output_num == 1) { // The number of output is 1
if (op_desc.HasAttr("out_threshold")) {
float out_scale = float out_scale =
BOOST_GET_CONST(float, op_desc.GetAttr("out_threshold")); BOOST_GET_CONST(float, op_desc.GetAttr("out_threshold"));
std::string output_name = ""; std::string output_name = "";
...@@ -167,6 +168,21 @@ class OpConverter { ...@@ -167,6 +168,21 @@ class OpConverter {
VLOG(1) << "Set out scale = " << out_scale << " for tensor " VLOG(1) << "Set out scale = " << out_scale << " for tensor "
<< output_name << "."; << output_name << ".";
} }
} else if (output_num > 1) { // The number of outputs greater than 1
for (size_t i = 0; i < output_num; ++i) {
if (op_desc.HasAttr("out_" + std::to_string(i) + "_threshold")) {
float out_scale = BOOST_GET_CONST(
float,
op_desc.GetAttr("out_" + std::to_string(i) + "_threshold"));
std::string output_name =
op_desc.Output(op_desc.OutputNames()[i]).front();
auto* output_itensor = engine->GetITensor(output_name);
engine->SetTensorDynamicRange(output_itensor, out_scale);
VLOG(1) << "Set out scale = " << out_scale << " for tensor "
<< output_name << ".";
}
}
}
} }
// Convert a fluid block to tensorrt network, NOTE it just convert operators, // Convert a fluid block to tensorrt network, NOTE it just convert operators,
......
...@@ -89,21 +89,34 @@ class ScaleOpConverter : public OpConverter { ...@@ -89,21 +89,34 @@ class ScaleOpConverter : public OpConverter {
expand_layer = TRT_ENGINE_ADD_LAYER(engine_, Shuffle, *input); expand_layer = TRT_ENGINE_ADD_LAYER(engine_, Shuffle, *input);
expand_layer->setReshapeDimensions(expand_shape); expand_layer->setReshapeDimensions(expand_shape);
input = expand_layer->getOutput(0); input = expand_layer->getOutput(0);
expand_layer->getOutput(0)->setName(
("before_reshape_out: " + out_name).c_str());
expand_layer->setName(
("Scale: before_reshape (Output: " + out_name + ")").c_str());
} }
if (bias_after_scale) { if (bias_after_scale) {
layer = TRT_ENGINE_ADD_LAYER( layer = TRT_ENGINE_ADD_LAYER(
engine_, Scale, *input, nvinfer1::ScaleMode::kUNIFORM, engine_, Scale, *input, nvinfer1::ScaleMode::kUNIFORM,
shift_weights.get(), scale_weights.get(), power_weights.get()); shift_weights.get(), scale_weights.get(), power_weights.get());
layer->getOutput(0)->setName(
("bias_after_scale_out: " + out_name).c_str());
layer->setName(("Scale: scale (Output: " + out_name + ")").c_str());
} else { } else {
// add bias // add bias
layer = TRT_ENGINE_ADD_LAYER( layer = TRT_ENGINE_ADD_LAYER(
engine_, Scale, *(input), nvinfer1::ScaleMode::kUNIFORM, engine_, Scale, *(input), nvinfer1::ScaleMode::kUNIFORM,
shift_weights.get(), power_weights.get(), power_weights.get()); shift_weights.get(), power_weights.get(), power_weights.get());
layer->getOutput(0)->setName(
("bias_before_scale:bias_out: " + out_name).c_str());
layer->setName(("Scale: scale_bias (Output: " + out_name + ")").c_str());
// mul scale // mul scale
layer = TRT_ENGINE_ADD_LAYER( layer = TRT_ENGINE_ADD_LAYER(
engine_, Scale, *(layer->getOutput(0)), nvinfer1::ScaleMode::kUNIFORM, engine_, Scale, *(layer->getOutput(0)), nvinfer1::ScaleMode::kUNIFORM,
power_weights.get(), scale_weights.get(), power_weights.get()); power_weights.get(), scale_weights.get(), power_weights.get());
layer->getOutput(0)->setName(
("bias_before_scale:scale_out: " + out_name).c_str());
layer->setName(("Scale: scale_scale (Output: " + out_name + ")").c_str());
} }
PADDLE_ENFORCE_EQ(layer != nullptr, true, PADDLE_ENFORCE_EQ(layer != nullptr, true,
...@@ -119,6 +132,9 @@ class ScaleOpConverter : public OpConverter { ...@@ -119,6 +132,9 @@ class ScaleOpConverter : public OpConverter {
TRT_ENGINE_ADD_LAYER(engine_, Shuffle, *(layer->getOutput(0))); TRT_ENGINE_ADD_LAYER(engine_, Shuffle, *(layer->getOutput(0)));
squeeze_layer->setReshapeDimensions(squeeze_shape); squeeze_layer->setReshapeDimensions(squeeze_shape);
layer = static_cast<nvinfer1::ILayer*>(squeeze_layer); layer = static_cast<nvinfer1::ILayer*>(squeeze_layer);
layer->getOutput(0)->setName(("after_reshape_out: " + out_name).c_str());
layer->setName(
("Scale: Shuffle_reshape (Output: " + out_name + ")").c_str());
} }
RreplenishLayerAndOutput(layer, "scale", {out_name}, test_mode); RreplenishLayerAndOutput(layer, "scale", {out_name}, test_mode);
} }
......
...@@ -30,10 +30,11 @@ class SliceOpConverter : public OpConverter { ...@@ -30,10 +30,11 @@ class SliceOpConverter : public OpConverter {
framework::OpDesc op_desc(op, nullptr); framework::OpDesc op_desc(op, nullptr);
// Declare inputs // Declare inputs
auto* input = engine_->GetITensor(op_desc.Input("Input")[0]); auto* input = engine_->GetITensor(op_desc.Input("Input")[0]);
auto output_name = op_desc.Output("Out")[0];
float out_scale = 1;
if (op_desc.HasAttr("out_threshold")) { if (op_desc.HasAttr("out_threshold")) {
float out_scale = out_scale = BOOST_GET_CONST(float, op_desc.GetAttr("out_threshold"));
BOOST_GET_CONST(float, op_desc.GetAttr("out_threshold"));
engine_->SetTensorDynamicRange(input, out_scale); engine_->SetTensorDynamicRange(input, out_scale);
} }
...@@ -71,12 +72,22 @@ class SliceOpConverter : public OpConverter { ...@@ -71,12 +72,22 @@ class SliceOpConverter : public OpConverter {
nvinfer1::ILayer* layer = nullptr; nvinfer1::ILayer* layer = nullptr;
if (engine_->with_dynamic_shape()) { if (engine_->with_dynamic_shape()) {
#if IS_TRT_VERSION_GE(6000)
if (engine_->use_oss() && engine_->with_ernie()) { if (engine_->use_oss() && engine_->with_ernie()) {
std::vector<nvinfer1::ITensor*> plugin_inputs; std::vector<nvinfer1::ITensor*> plugin_inputs;
// plugin_inputs.emplace_back(trans_layer->getOutput(0)); if (engine_->with_interleaved()) {
auto* shuffler_slice = TRT_ENGINE_ADD_LAYER(engine_, Shuffle, *input);
nvinfer1::Permutation transpose_embed{2, 1, 0, 3};
shuffler_slice->setSecondTranspose(transpose_embed);
engine_->SetTensorDynamicRange(shuffler_slice->getOutput(0),
out_scale);
shuffler_slice->setName(
("SpecialSlice_interleaved: Shuffle: (Output: " + output_name +
")")
.c_str());
plugin_inputs.emplace_back(shuffler_slice->getOutput(0));
} else {
plugin_inputs.emplace_back(input); plugin_inputs.emplace_back(input);
}
std::string pos_name; std::string pos_name;
if (engine_->Has("ernie_pos_name")) { if (engine_->Has("ernie_pos_name")) {
pos_name = engine_->Get<std::string>("ernie_pos_name"); pos_name = engine_->Get<std::string>("ernie_pos_name");
...@@ -99,11 +110,6 @@ class SliceOpConverter : public OpConverter { ...@@ -99,11 +110,6 @@ class SliceOpConverter : public OpConverter {
new plugin::SlicePluginDynamic(starts, ends, axes, with_fp16); new plugin::SlicePluginDynamic(starts, ends, axes, with_fp16);
layer = engine_->AddDynamicPlugin(&input, 1, plugin); layer = engine_->AddDynamicPlugin(&input, 1, plugin);
} }
#else
PADDLE_THROW(platform::errors::Fatal(
"You are running the TRT Dynamic Shape mode, need to confirm that "
"your TRT version is no less than 6.0"));
#endif
} else { } else {
bool with_fp16 = bool with_fp16 =
engine_->WithFp16() && !engine_->disable_trt_plugin_fp16(); engine_->WithFp16() && !engine_->disable_trt_plugin_fp16();
...@@ -111,8 +117,6 @@ class SliceOpConverter : public OpConverter { ...@@ -111,8 +117,6 @@ class SliceOpConverter : public OpConverter {
new plugin::SlicePlugin(starts, ends, axes, with_fp16); new plugin::SlicePlugin(starts, ends, axes, with_fp16);
layer = engine_->AddPlugin(&input, 1, plugin); layer = engine_->AddPlugin(&input, 1, plugin);
} }
auto output_name = op_desc.Output("Out")[0];
RreplenishLayerAndOutput(layer, "slice", {output_name}, test_mode); RreplenishLayerAndOutput(layer, "slice", {output_name}, test_mode);
} }
}; };
......
...@@ -407,6 +407,9 @@ class TensorRTEngine { ...@@ -407,6 +407,9 @@ class TensorRTEngine {
void SetUseDLA(bool use_dla) { use_dla_ = use_dla; } void SetUseDLA(bool use_dla) { use_dla_ = use_dla; }
void SetDLACore(int dla_core) { dla_core_ = dla_core; } void SetDLACore(int dla_core) { dla_core_ = dla_core; }
void SetWithErnie(bool with_ernie) { with_ernie_ = with_ernie; } void SetWithErnie(bool with_ernie) { with_ernie_ = with_ernie; }
void SetWithInterleaved(bool with_interleaved) {
with_interleaved_ = with_interleaved;
}
void ClearWeights() { void ClearWeights() {
for (auto& weight_pair : weight_map) { for (auto& weight_pair : weight_map) {
...@@ -480,6 +483,7 @@ class TensorRTEngine { ...@@ -480,6 +483,7 @@ class TensorRTEngine {
bool use_oss() { return use_oss_; } bool use_oss() { return use_oss_; }
bool with_ernie() { return with_ernie_; } bool with_ernie() { return with_ernie_; }
bool with_interleaved() { return with_interleaved_; }
bool disable_trt_plugin_fp16() { return disable_trt_plugin_fp16_; } bool disable_trt_plugin_fp16() { return disable_trt_plugin_fp16_; }
bool with_dynamic_shape() { return with_dynamic_shape_; } bool with_dynamic_shape() { return with_dynamic_shape_; }
AnalysisConfig::Precision precision() { return precision_; } AnalysisConfig::Precision precision() { return precision_; }
...@@ -612,6 +616,7 @@ class TensorRTEngine { ...@@ -612,6 +616,7 @@ class TensorRTEngine {
bool use_dla_{false}; bool use_dla_{false};
int dla_core_{0}; int dla_core_{0};
bool with_ernie_{false}; bool with_ernie_{false};
bool with_interleaved_{false};
nvinfer1::ILogger& logger_; nvinfer1::ILogger& logger_;
// max data size for the buffers. // max data size for the buffers.
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册