From 9fa2eb387429c25f2ccbdf6969ab8886ed5ef6dc Mon Sep 17 00:00:00 2001 From: Hui Zhang Date: Wed, 1 Feb 2023 15:20:42 +0800 Subject: [PATCH] jit layer support multi thread and fix predictor clone (#50095) * jit layer support multi thread * fix bug * clone prediector not do graph optimizer * format * fix comment and format * fix override and fromat * fix * fix --- .../fluid/inference/api/analysis_predictor.cc | 3 + paddle/fluid/jit/compilation_unit.cc | 8 +++ paddle/fluid/jit/compilation_unit.h | 2 + paddle/fluid/jit/engine/base_engine.h | 2 + paddle/fluid/jit/engine/interpreter_engine.cc | 9 ++- paddle/fluid/jit/engine/interpreter_engine.h | 8 ++- paddle/fluid/jit/engine/predictor_engine.cc | 17 ++++++ paddle/fluid/jit/engine/predictor_engine.h | 13 ++++- paddle/fluid/jit/layer.cc | 12 +++- paddle/fluid/jit/layer.h | 3 + paddle/fluid/jit/layer_test.cc | 56 +++++++++++++++++++ paddle/fluid/jit/serializer.cc | 2 + 12 files changed, 128 insertions(+), 7 deletions(-) diff --git a/paddle/fluid/inference/api/analysis_predictor.cc b/paddle/fluid/inference/api/analysis_predictor.cc index e89bcfa2c6..49e18f9500 100644 --- a/paddle/fluid/inference/api/analysis_predictor.cc +++ b/paddle/fluid/inference/api/analysis_predictor.cc @@ -1086,6 +1086,7 @@ bool AnalysisPredictor::GetFetch(std::vector *outputs, } void AnalysisPredictor::PrepareArgument() { + VLOG(3) << "AnalysisPredictor::PrepareArgument"; // Init std::unique_ptr argument_. argument_.reset(new Argument); argument_->SetUseGPU(config_.use_gpu()); @@ -2246,10 +2247,12 @@ AnalysisPredictor::~AnalysisPredictor() { } std::unique_ptr AnalysisPredictor::Clone(void *stream) { + VLOG(3) << "AnalysisPredictor::Clone"; std::lock_guard lk(clone_mutex_); auto *x = new AnalysisPredictor(config_); x->status_is_cloned_ = true; x->root_predictor_id_ = this->root_predictor_id_; + x->config_.apply_optim_ = false; if (config_.use_external_stream_ && stream == nullptr) { PADDLE_THROW(platform::errors::InvalidArgument( "config has been configured to use external stream, but the Clone " diff --git a/paddle/fluid/jit/compilation_unit.cc b/paddle/fluid/jit/compilation_unit.cc index 0f241d864f..1a2351048f 100644 --- a/paddle/fluid/jit/compilation_unit.cc +++ b/paddle/fluid/jit/compilation_unit.cc @@ -38,5 +38,13 @@ void CompilationUnit::SetEngine(const std::string &name, const jit::EngineMap &CompilationUnit::EngineMap() const { return engine_map_; } +std::shared_ptr CompilationUnit::Clone(void *stream) { + auto x = std::make_shared(); + for (auto &it : engine_map_) { + x->SetEngine(it.first, std::move(it.second->Clone(stream))); + } + return x; +} + } // namespace jit } // namespace paddle diff --git a/paddle/fluid/jit/compilation_unit.h b/paddle/fluid/jit/compilation_unit.h index b862faa23f..25e725fe57 100644 --- a/paddle/fluid/jit/compilation_unit.h +++ b/paddle/fluid/jit/compilation_unit.h @@ -36,6 +36,8 @@ class CompilationUnit { const jit::EngineMap &EngineMap() const; + std::shared_ptr Clone(void *stream = nullptr); + private: jit::EngineMap engine_map_; }; diff --git a/paddle/fluid/jit/engine/base_engine.h b/paddle/fluid/jit/engine/base_engine.h index eaf3c1221c..b6571d7ebd 100644 --- a/paddle/fluid/jit/engine/base_engine.h +++ b/paddle/fluid/jit/engine/base_engine.h @@ -29,6 +29,8 @@ class BaseEngine { virtual std::vector operator()(const std::vector &inputs) = 0; + virtual std::unique_ptr Clone(void *stream = nullptr) = 0; + virtual ~BaseEngine() {} }; diff --git a/paddle/fluid/jit/engine/interpreter_engine.cc b/paddle/fluid/jit/engine/interpreter_engine.cc index 410fd4dc01..36f8a2271d 100644 --- a/paddle/fluid/jit/engine/interpreter_engine.cc +++ b/paddle/fluid/jit/engine/interpreter_engine.cc @@ -28,14 +28,14 @@ namespace jit { InterpreterEngine::InterpreterEngine(const std::shared_ptr &info, const VariableMap ¶ms_dict, const phi::Place &place) - : info_(info), place_(place) { + : info_(info), params_dict_(params_dict), place_(place) { info_->RemoveDescFeedFetch(); PADDLE_ENFORCE_GT( static_cast(info_->ProgramDesc().Block(0).OpSize()), 0, platform::errors::PreconditionNotMet( "There is no operator in ProgramDesc.")); - utils::ShareParamsIntoScope(info_->ParamNames(), params_dict, &scope_); + utils::ShareParamsIntoScope(info_->ParamNames(), params_dict_, &scope_); VLOG(6) << framework::GenScopeTreeDebugInfo(&scope_); CreateInterpreterCore(); } @@ -98,5 +98,10 @@ const std::shared_ptr &InterpreterEngine::Info() const { return info_; } +std::unique_ptr InterpreterEngine::Clone(void *stream) { + auto *x = new InterpreterEngine(info_, params_dict_, place_); + return std::unique_ptr(x); +} + } // namespace jit } // namespace paddle diff --git a/paddle/fluid/jit/engine/interpreter_engine.h b/paddle/fluid/jit/engine/interpreter_engine.h index 8c7f43f297..d7aa5d610a 100644 --- a/paddle/fluid/jit/engine/interpreter_engine.h +++ b/paddle/fluid/jit/engine/interpreter_engine.h @@ -43,14 +43,18 @@ class InterpreterEngine : public BaseEngine { void CreateInterpreterCore(); - std::vector operator()(const std::vector &inputs); + std::vector operator()(const std::vector &inputs) override; - std::vector operator()(const std::vector &inputs); + std::vector operator()( + const std::vector &inputs) override; const std::shared_ptr &Info() const; + std::unique_ptr Clone(void *stream = nullptr) override; + private: std::shared_ptr info_; + VariableMap params_dict_; framework::Scope scope_; phi::Place place_; std::shared_ptr inner_interpreter_; diff --git a/paddle/fluid/jit/engine/predictor_engine.cc b/paddle/fluid/jit/engine/predictor_engine.cc index 6a44c192c1..bac6f993b0 100644 --- a/paddle/fluid/jit/engine/predictor_engine.cc +++ b/paddle/fluid/jit/engine/predictor_engine.cc @@ -55,6 +55,17 @@ PredictorEngine::PredictorEngine(const std::shared_ptr &info, scope_, std::make_shared(info_->ProgramDesc())); } +PredictorEngine::PredictorEngine( + const std::shared_ptr &info, + const std::shared_ptr &scope, + const phi::Place &place, + const std::shared_ptr &predictor) + : info_(info), + scope_(scope), + place_(place), + predictor_(std::dynamic_pointer_cast( + predictor)) {} + std::vector PredictorEngine::operator()( const std::vector &inputs) { auto dense_tensors = utils::ToDenseTensors(inputs); @@ -188,5 +199,11 @@ static bool PaddleTensorToDenseTensor(const PaddleTensor &pt, return true; } +std::unique_ptr PredictorEngine::Clone(void *stream) { + auto *x = new PredictorEngine( + info_, scope_, place_, std::move(predictor_->Clone(stream))); + return std::unique_ptr(x); +} + } // namespace jit } // namespace paddle diff --git a/paddle/fluid/jit/engine/predictor_engine.h b/paddle/fluid/jit/engine/predictor_engine.h index 026b012cbf..ad07a7a7ff 100644 --- a/paddle/fluid/jit/engine/predictor_engine.h +++ b/paddle/fluid/jit/engine/predictor_engine.h @@ -20,6 +20,7 @@ namespace paddle { class AnalysisPredictor; +class PaddlePredictor; namespace framework { class Scope; @@ -33,11 +34,19 @@ class PredictorEngine : public BaseEngine { const VariableMap ¶ms_dict, const phi::Place &place); + PredictorEngine(const std::shared_ptr &info, + const std::shared_ptr &scope, + const phi::Place &place, + const std::shared_ptr &predictor); + ~PredictorEngine() noexcept {} - std::vector operator()(const std::vector &inputs); + std::vector operator()(const std::vector &inputs) override; + + std::vector operator()( + const std::vector &inputs) override; - std::vector operator()(const std::vector &inputs); + std::unique_ptr Clone(void *stream = nullptr) override; private: std::shared_ptr info_; diff --git a/paddle/fluid/jit/layer.cc b/paddle/fluid/jit/layer.cc index 75a7e282e6..332c53a8e3 100644 --- a/paddle/fluid/jit/layer.cc +++ b/paddle/fluid/jit/layer.cc @@ -30,7 +30,10 @@ Layer::Layer(const VariableMap& params_map, const VariableMap& attrs_map, const FunctionInfoMap& info_map, const phi::Place& place) - : params_map_(params_map), attrs_map_(attrs_map), info_map_(info_map) { + : params_map_(params_map), + attrs_map_(attrs_map), + info_map_(info_map), + place_(place) { unit_.reset(new CompilationUnit()); } @@ -94,5 +97,12 @@ PD_SPECIALZE_ATTRIBUTE_TYPE(std::vector) PD_SPECIALZE_ATTRIBUTE_TYPE(std::vector) PD_SPECIALZE_ATTRIBUTE_TYPE(std::vector) +std::shared_ptr Layer::Clone(void* stream) { + std::shared_ptr x = + std::make_shared(params_map_, attrs_map_, info_map_, place_); + x->unit_ = unit_->Clone(stream); + return x; +} + } // namespace jit } // namespace paddle diff --git a/paddle/fluid/jit/layer.h b/paddle/fluid/jit/layer.h index dd5ff5d9f9..ed8b739a0b 100644 --- a/paddle/fluid/jit/layer.h +++ b/paddle/fluid/jit/layer.h @@ -67,10 +67,13 @@ class Layer { std::vector FunctionNames() const; + std::shared_ptr Clone(void* stream = nullptr); + private: VariableMap params_map_; VariableMap attrs_map_; FunctionInfoMap info_map_; + phi::Place place_; std::shared_ptr unit_; }; diff --git a/paddle/fluid/jit/layer_test.cc b/paddle/fluid/jit/layer_test.cc index 4e367d8cc1..c163f3c50d 100644 --- a/paddle/fluid/jit/layer_test.cc +++ b/paddle/fluid/jit/layer_test.cc @@ -20,6 +20,7 @@ #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/variable.h" +#include "paddle/fluid/platform/timer.h" #include "paddle/phi/api/include/api.h" #include "paddle/phi/core/dense_tensor.h" #include "paddle/phi/core/kernel_registry.h" @@ -78,7 +79,11 @@ TEST(CpuLayerTest, Function) { TEST(CpuLayerTest, Construct) { auto place = phi::CPUPlace(); std::string path = "./multi_program_load/export"; + paddle::platform::Timer timer; + timer.Start(); auto layer = jit::Load(path, place); + timer.Pause(); + std::cout << "jit::Load coast" << timer.ElapsedMS() << std::endl; float fbias = layer.Attribute("fbias"); EXPECT_FLOAT_EQ(fbias, 1.4); @@ -119,6 +124,41 @@ TEST(CpuLayerTest, Construct) { EXPECT_NEAR(out_data[0], pow(1.41562390, 2.0), 1e-6); } +TEST(CpuLayerTest, Clone) { + auto place = phi::CPUPlace(); + std::string path = "./multi_program_load/export"; + + paddle::platform::Timer timer; + timer.Start(); + auto layer = jit::Load(path, place); + timer.Pause(); + std::cout << "jit::Load cost " << timer.ElapsedMS() << " ms" << std::endl; + + timer.Start(); + auto layer2 = layer.Clone(); + timer.Pause(); + std::cout << "jit::Layer::Clone cost " << timer.ElapsedMS() << " ms" + << std::endl; + + float fbias = layer2->Attribute("fbias"); + EXPECT_FLOAT_EQ(fbias, 1.4); + + auto inputs = PrepareInputs(place); + auto outs = layer2->forward(inputs); + auto out_data = outs[0].data(); + EXPECT_NEAR(out_data[0], 0.02194316, 1e-6); + + auto func = layer2->Function("infer"); + EXPECT_TRUE(func.IsValid()); + outs = func(inputs); + out_data = outs[0].data(); + EXPECT_NEAR(out_data[0], 1.41562390, 1e-6); + auto pow_out = + paddle::experimental::pow(outs[0], paddle::experimental::Scalar(2)); + out_data = pow_out.data(); + EXPECT_NEAR(out_data[0], pow(1.41562390, 2.0), 1e-6); +} + #if defined(PADDLE_WITH_CUDA) TEST(GpuLayerTest, Construct) { auto place = phi::GPUPlace(); @@ -147,6 +187,22 @@ TEST(GpuLayerTest, Construct) { out_data = cpu_tensor.data(); EXPECT_NEAR(out_data[0], sqrt(1.41562390), 1e-6); } + +TEST(GpuLayerTest, Clone) { + auto place = phi::GPUPlace(); + + std::string path = "./multi_program_load/export"; + auto layer = jit::Load(path, place); + auto inputs = PrepareInputs(place); + + auto layer2 = layer.Clone(); + auto outs = layer2->forward(inputs); + auto gpu_tensor = outs[0]; + auto cpu_tensor = + paddle::experimental::copy_to(gpu_tensor, phi::CPUPlace(), true); + auto out_data = cpu_tensor.data(); + EXPECT_NEAR(out_data[0], 0.02194316, 1e-6); +} #endif } // namespace jit diff --git a/paddle/fluid/jit/serializer.cc b/paddle/fluid/jit/serializer.cc index 0a7fdc0e35..436717a8dc 100644 --- a/paddle/fluid/jit/serializer.cc +++ b/paddle/fluid/jit/serializer.cc @@ -30,8 +30,10 @@ DECLARE_string(jit_engine_type); namespace paddle { namespace jit { + using FunctionInfoMap = std::unordered_map>; + Layer Deserializer::operator()(const std::string& path, const phi::Place& place) { const auto& pdmodel_paths = utils::PdmodelFilePaths(path); -- GitLab