diff --git a/paddle/fluid/framework/ir/delete_fill_constant_op_pass.cc b/paddle/fluid/framework/ir/delete_fill_constant_op_pass.cc index 6104de7ab8a6b36fb96c8311ae1d8d3ad04f466c..203c48956809e01d3e04e09a45b056e1656baffa 100644 --- a/paddle/fluid/framework/ir/delete_fill_constant_op_pass.cc +++ b/paddle/fluid/framework/ir/delete_fill_constant_op_pass.cc @@ -29,6 +29,11 @@ void FillConstData(phi::DenseTensor* out_t, T value) { } void DeleteFillConstantOpPass::ApplyImpl(ir::Graph* graph) const { + bool with_dynamic_shape = Get("with_dynamic_shape"); + // Not support + if (with_dynamic_shape) { + return; + } FusePassBase::Init("delete_fill_constant_op_pass", graph); GraphPatternDetector detector; auto fill_constant_op = diff --git a/paddle/fluid/framework/ir/float_to_half_pass.cc b/paddle/fluid/framework/ir/float_to_half_pass.cc index ec94728fb3c64175bdf44eab7ed2683f1ba4ce75..9389490712c65b1df46d1ecfebf433203d4d5a10 100644 --- a/paddle/fluid/framework/ir/float_to_half_pass.cc +++ b/paddle/fluid/framework/ir/float_to_half_pass.cc @@ -16,7 +16,12 @@ #include "paddle/fluid/framework/ir/graph_helper.h" #include "paddle/fluid/framework/operator.h" -#include "paddle/phi/common/data_type.h" +#include "paddle/phi/common/bfloat16.h" +#include "paddle/phi/common/float16.h" +#include "paddle/phi/common/place.h" +#include "paddle/phi/core/dense_tensor.h" +#include "paddle/phi/core/enforce.h" +#include "paddle/phi/core/errors.h" namespace paddle { namespace framework { @@ -620,34 +625,45 @@ void FloatToHalfPass::ConvertWeightsData() const { for (const auto& var_name : var_names) { if (vars_convert_to_half_.count(var_name)) { VLOG(4) << var_name << "'s data type was convert to half"; -#define CONVERT_TENSOR_DTYPE(DTYPE, dtype) \ - half_tensor.set_type(DTYPE); \ - auto* half_data = half_tensor.mutable_data(platform::CPUPlace()); \ - for (int64_t i = 0; i < origin_tensor->numel(); i++) { \ - half_data[i] = static_cast(origin_data[i]); \ - } \ - origin_tensor->clear(); \ - paddle::framework::TensorCopySync( \ - half_tensor, platform::CPUPlace(), origin_tensor) auto* var = scope->FindLocalVar(var_name); - - if (var->IsType()) { - auto* origin_tensor = var->GetMutable(); - phi::DenseTensor half_tensor; - half_tensor.Resize(origin_tensor->dims()); - auto* origin_data = - origin_tensor->mutable_data(platform::CPUPlace()); - if (half_precision_ == phi::DataType::FLOAT16) { - CONVERT_TENSOR_DTYPE(paddle::experimental::DataType::FLOAT16, - phi::dtype::float16); - } else if (half_precision_ == phi::DataType::BFLOAT16) { - CONVERT_TENSOR_DTYPE(paddle::experimental::DataType::BFLOAT16, - phi::dtype::bfloat16); + CHECK_EQ(var->IsType(), true); + + auto* origin_tensor = var->GetMutable(); + + phi::DenseTensor half_tensor; + half_tensor.Resize(origin_tensor->dims()); + half_tensor.set_type(half_precision_); + + if (half_precision_ == phi::DataType::FLOAT16) { + auto* half_data = + half_tensor.mutable_data(phi::CPUPlace{}); + for (int64_t i = 0; i < origin_tensor->numel(); i++) { + if (origin_tensor->dtype() == phi::DataType::FLOAT64) { + auto* origin_data = origin_tensor->data(); + half_data[i] = static_cast(origin_data[i]); + } else if (origin_tensor->dtype() == phi::DataType::FLOAT32) { + auto* origin_data = origin_tensor->data(); + half_data[i] = static_cast(origin_data[i]); + } + } + } else if (half_precision_ == phi::DataType::BFLOAT16) { + auto* half_data = + half_tensor.mutable_data(phi::CPUPlace{}); + for (int64_t i = 0; i < origin_tensor->numel(); i++) { + if (origin_tensor->dtype() == phi::DataType::FLOAT64) { + auto* origin_data = origin_tensor->data(); + half_data[i] = static_cast(origin_data[i]); + } else if (origin_tensor->dtype() == phi::DataType::FLOAT32) { + auto* origin_data = origin_tensor->data(); + half_data[i] = static_cast(origin_data[i]); + } } } + origin_tensor->clear(); + paddle::framework::TensorCopySync( + half_tensor, phi::CPUPlace{}, origin_tensor); } -#undef CONVERT_TENSOR_DTYPE } } diff --git a/paddle/fluid/framework/ir/float_to_half_pass.h b/paddle/fluid/framework/ir/float_to_half_pass.h index a274dc9a53c61a1490c96d60ba96e49608fe446b..1af59f5fbc30dca1dcb5bcd3efa391d15ca69bdb 100644 --- a/paddle/fluid/framework/ir/float_to_half_pass.h +++ b/paddle/fluid/framework/ir/float_to_half_pass.h @@ -22,9 +22,6 @@ #include "paddle/fluid/framework/ir/node.h" #include "paddle/phi/common/backend.h" #include "paddle/phi/common/data_type.h" -#include "paddle/phi/common/float16.h" -#include "paddle/phi/common/layout.h" -#include "paddle/phi/common/place.h" namespace paddle { namespace framework { diff --git a/paddle/fluid/framework/ir/map_depthwise_conv_to_conv_pass.cc b/paddle/fluid/framework/ir/map_depthwise_conv_to_conv_pass.cc index 341fedcd4bacd56f3e478d31c12827f31e69a8c7..9aeb74584dba656291363b23099c6742dbe1f1e2 100644 --- a/paddle/fluid/framework/ir/map_depthwise_conv_to_conv_pass.cc +++ b/paddle/fluid/framework/ir/map_depthwise_conv_to_conv_pass.cc @@ -41,6 +41,7 @@ void MapDepthwiseConv2ConvPass::ApplyImpl(ir::Graph* graph) const { std::string op_type = op_desc->Type(); if (!replaced_map.count(op_type)) continue; op_desc->SetType(replaced_map[op_type]); + op_desc->SetAttr("use_cudnn", true); op_desc->Flush(); ++found_count; } diff --git a/paddle/fluid/inference/analysis/ir_pass_manager.cc b/paddle/fluid/inference/analysis/ir_pass_manager.cc index 25b371cb2ff39ebab23595e0a858181ac334d760..f84ed64e7009af3da0cf054eb9d0a313ff0c1414 100644 --- a/paddle/fluid/inference/analysis/ir_pass_manager.cc +++ b/paddle/fluid/inference/analysis/ir_pass_manager.cc @@ -27,6 +27,7 @@ #include "paddle/fluid/framework/scope.h" #include "paddle/fluid/inference/analysis/argument.h" #include "paddle/fluid/string/pretty_log.h" +#include "paddle/phi/core/errors.h" namespace paddle { namespace inference { @@ -305,42 +306,18 @@ void IRPassManager::CreatePasses(Argument *argument, } std::unique_ptr IRPassManager::Apply(std::unique_ptr graph) { - if (passes_.empty()) { - return graph; - } PADDLE_ENFORCE_NOT_NULL( - graph.get(), - platform::errors::PreconditionNotMet("Graph cannot be NULL.")); + graph.get(), platform::errors::InvalidArgument("Graph cannot be null.")); // Apply all the passes for (const auto &pass : passes_) { if (pass->Type() != "graph_viz_pass" && !disable_logs_) { PrettyLogEndl(Style::H2(), "--- Running IR pass [%s]", pass->Type()); } - // delete_fill_constant_op_pass is not apply under trt dynamic shape - if (pass->Type() == "delete_fill_constant_op_pass") { - bool use_dynamic = pass->Get("with_dynamic_shape"); - if (use_dynamic) continue; - } graph.reset(pass->Apply(graph.release())); } return graph; } -framework::proto::ProgramDesc IRPassManager::AcquireProgram( - std::unique_ptr *graph, ProgramDesc *program) const { - auto pass = - framework::ir::PassRegistry::Instance().Get("graph_to_program_pass"); - - // Direct using ProgramDesc desc(argument->main_program()) may cause - // incomplete copies of information. - ProgramDesc desc; - desc.CopyFrom(*program->Proto()); - pass->SetNotOwned("program", &desc); - auto *the_graph = graph->release(); - graph->reset(pass->Apply(the_graph)); - return *desc.Proto(); -} - } // namespace analysis } // namespace inference } // namespace paddle diff --git a/paddle/fluid/inference/analysis/ir_pass_manager.h b/paddle/fluid/inference/analysis/ir_pass_manager.h index 9f9a5fc347123b91cd86db495c2b2e0c7a6a6518..c56d3d40f54defdf7c0696762487e237fdbdd3ee 100644 --- a/paddle/fluid/inference/analysis/ir_pass_manager.h +++ b/paddle/fluid/inference/analysis/ir_pass_manager.h @@ -48,15 +48,9 @@ class IRPassManager final { std::unique_ptr Apply(std::unique_ptr graph); - framework::proto::ProgramDesc AcquireProgram(std::unique_ptr *graph, - ProgramDesc *program) const; - - framework::ir::Graph &graph() const { return *graph_; } - private: void CreatePasses(Argument *argument, const std::vector &passes); - std::unique_ptr graph_; std::vector> passes_; bool disable_logs_{false}; }; diff --git a/paddle/fluid/inference/api/analysis_config.cc b/paddle/fluid/inference/api/analysis_config.cc index 17afc4f840e7dcec20e1562232ebaebfde1771be..c7554de9df73b26f2b11a040940fbe91f557d612 100644 --- a/paddle/fluid/inference/api/analysis_config.cc +++ b/paddle/fluid/inference/api/analysis_config.cc @@ -108,6 +108,7 @@ void AnalysisConfig::EnableUseGpu(uint64_t memory_pool_init_size_mb, } #else LOG(ERROR) << "Please use PaddlePaddle with GPU version."; + use_gpu_ = false; #endif Update(); @@ -299,7 +300,7 @@ void AnalysisConfig::LoadIpuConfig(const std::string &config_path) { if (ipu_config_mapper_.find(key) == ipu_config_mapper_.end()) { PADDLE_THROW(platform::errors::InvalidArgument( - "invalid key {} in IPU config: ", key)); + "invalid key %s in IPU config: ", key)); } switch (ipu_config_mapper_.at(key)) { case ipu_config_code::ipu_device_num: @@ -335,10 +336,9 @@ void AnalysisConfig::LoadIpuConfig(const std::string &config_path) { case ipu_config_code::ipu_enable_model_runtime_executor: ipu_enable_model_runtime_executor_ = string2bool(value); break; - default: PADDLE_THROW(platform::errors::InvalidArgument( - "invalid key {} in IPU config", key)); + "invalid key %s in IPU config", key)); break; } } @@ -1438,7 +1438,7 @@ bool AnalysisConfig::trt_allow_build_at_runtime() const { return trt_allow_build_at_runtime_; } -void AnalysisConfig::Exp_DisableMixedInferOps( +void AnalysisConfig::Exp_DisableMixedPrecisionOps( const std::unordered_set &black_list) { mixed_black_list_ = black_list; } diff --git a/paddle/fluid/inference/api/paddle_analysis_config.h b/paddle/fluid/inference/api/paddle_analysis_config.h index 5bf5d3de7b0f00dac516a31d1c3baec7601978da..b4c5a0d293574dbe070eb4c50aeab956032ac1f4 100644 --- a/paddle/fluid/inference/api/paddle_analysis_config.h +++ b/paddle/fluid/inference/api/paddle_analysis_config.h @@ -1009,7 +1009,7 @@ struct PD_INFER_DECL AnalysisConfig { /// interface is in the experimental stage and may change in the future. Note /// that the blacklist must be the same as the model conversion blacklist. /// - void Exp_DisableMixedInferOps( + void Exp_DisableMixedPrecisionOps( const std::unordered_set& black_list); void SetApplyOptim(bool value) { apply_optim_ = value; } diff --git a/paddle/fluid/inference/tests/api/CMakeLists.txt b/paddle/fluid/inference/tests/api/CMakeLists.txt index 7398b9c2c01361ada531505e71563fad42172ab4..a5cdfda3243eb469418a87709fd851f089f37478 100644 --- a/paddle/fluid/inference/tests/api/CMakeLists.txt +++ b/paddle/fluid/inference/tests/api/CMakeLists.txt @@ -418,7 +418,7 @@ if(WITH_GPU) analyzer_ernie_tester.cc) inference_analysis_api_test(gpu_ernie_half_test ${ERNIE_INSTALL_DIR} gpu_ernie_half_test.cc) - set_tests_properties(gpu_ernie_half_test PROPERTIES TIMEOUT 40) + set_tests_properties(gpu_ernie_half_test PROPERTIES TIMEOUT 60) endif() inference_analysis_api_int8_test(test_analyzer_ernie_int8 ${ERNIE_INSTALL_DIR} analyzer_ernie_int8_tester.cc)