未验证 提交 4b3ac86d 编写于 作者: Y Yuanle Liu 提交者: GitHub

[Inference] save_optimized_model_pass support gpu (#55551)

* fix cudnn 8.7+ bug on cudnnConvolutionBiasActivationForward

* save_optimized_model_pass support gpu
上级 45d49619
......@@ -55,6 +55,9 @@ void SaveOptimizedModelPass::SaveOptimizedModel(Argument* argument) {
auto* graph = argument->main_graph_ptr();
framework::ProgramDesc optimized_program_desc;
// NOTE(liuyuanle): If the following line of code is not added, an error
// [SegmentFault] may occur!
optimized_program_desc.CopyFrom(*argument->main_program().Proto());
framework::ir::GraphToProgram(*graph, &optimized_program_desc);
auto IsPersistable = [](const framework::VarDesc* var) {
......@@ -130,7 +133,9 @@ void SaveOptimizedModelPass::SaveOptimizedModel(Argument* argument) {
}
void SaveOptimizedModelPass::RunImpl(Argument* argument) {
if (argument->use_xpu_valid()) {
// TODO(inference): Support trt.
if (argument->use_xpu() ||
(argument->use_gpu() && !argument->use_tensorrt())) {
SaveOptimizedModel(argument);
}
}
......
......@@ -1379,6 +1379,7 @@ void AnalysisPredictor::PrepareArgument() {
argument_->SetOptimInputShape(config_.optim_input_shape_);
argument_->SetTensorRtTunedDynamicShape(
config_.tuned_tensorrt_dynamic_shape());
argument_->SetUseTensorRT(false);
if (config_.use_gpu() && config_.tensorrt_engine_enabled()) {
LOG(INFO) << "TensorRT subgraph engine is enabled";
argument_->SetUseTensorRT(true);
......
......@@ -181,8 +181,6 @@ class PD_INFER_DECL PassStrategy : public PaddlePassBuilder {
bool use_ipu_{false};
bool use_mkldnn_{false};
bool use_custom_device_{false};
bool use_gpu_low_precision_{false};
/// \endcond
};
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册