未验证 提交 9fa2eb38 编写于 作者: H Hui Zhang 提交者: GitHub

jit layer support multi thread and fix predictor clone (#50095)

* jit layer support multi thread

* fix bug

* clone prediector not do graph optimizer

* format

* fix comment and format

* fix override and fromat

* fix

* fix
上级 c62657b3
...@@ -1086,6 +1086,7 @@ bool AnalysisPredictor::GetFetch(std::vector<PaddleTensor> *outputs, ...@@ -1086,6 +1086,7 @@ bool AnalysisPredictor::GetFetch(std::vector<PaddleTensor> *outputs,
} }
void AnalysisPredictor::PrepareArgument() { void AnalysisPredictor::PrepareArgument() {
VLOG(3) << "AnalysisPredictor::PrepareArgument";
// Init std::unique_ptr argument_. // Init std::unique_ptr argument_.
argument_.reset(new Argument); argument_.reset(new Argument);
argument_->SetUseGPU(config_.use_gpu()); argument_->SetUseGPU(config_.use_gpu());
...@@ -2246,10 +2247,12 @@ AnalysisPredictor::~AnalysisPredictor() { ...@@ -2246,10 +2247,12 @@ AnalysisPredictor::~AnalysisPredictor() {
} }
std::unique_ptr<PaddlePredictor> AnalysisPredictor::Clone(void *stream) { std::unique_ptr<PaddlePredictor> AnalysisPredictor::Clone(void *stream) {
VLOG(3) << "AnalysisPredictor::Clone";
std::lock_guard<std::mutex> lk(clone_mutex_); std::lock_guard<std::mutex> lk(clone_mutex_);
auto *x = new AnalysisPredictor(config_); auto *x = new AnalysisPredictor(config_);
x->status_is_cloned_ = true; x->status_is_cloned_ = true;
x->root_predictor_id_ = this->root_predictor_id_; x->root_predictor_id_ = this->root_predictor_id_;
x->config_.apply_optim_ = false;
if (config_.use_external_stream_ && stream == nullptr) { if (config_.use_external_stream_ && stream == nullptr) {
PADDLE_THROW(platform::errors::InvalidArgument( PADDLE_THROW(platform::errors::InvalidArgument(
"config has been configured to use external stream, but the Clone " "config has been configured to use external stream, but the Clone "
......
...@@ -38,5 +38,13 @@ void CompilationUnit::SetEngine(const std::string &name, ...@@ -38,5 +38,13 @@ void CompilationUnit::SetEngine(const std::string &name,
const jit::EngineMap &CompilationUnit::EngineMap() const { return engine_map_; } const jit::EngineMap &CompilationUnit::EngineMap() const { return engine_map_; }
std::shared_ptr<CompilationUnit> CompilationUnit::Clone(void *stream) {
auto x = std::make_shared<CompilationUnit>();
for (auto &it : engine_map_) {
x->SetEngine(it.first, std::move(it.second->Clone(stream)));
}
return x;
}
} // namespace jit } // namespace jit
} // namespace paddle } // namespace paddle
...@@ -36,6 +36,8 @@ class CompilationUnit { ...@@ -36,6 +36,8 @@ class CompilationUnit {
const jit::EngineMap &EngineMap() const; const jit::EngineMap &EngineMap() const;
std::shared_ptr<CompilationUnit> Clone(void *stream = nullptr);
private: private:
jit::EngineMap engine_map_; jit::EngineMap engine_map_;
}; };
......
...@@ -29,6 +29,8 @@ class BaseEngine { ...@@ -29,6 +29,8 @@ class BaseEngine {
virtual std::vector<Tensor> operator()(const std::vector<Tensor> &inputs) = 0; virtual std::vector<Tensor> operator()(const std::vector<Tensor> &inputs) = 0;
virtual std::unique_ptr<BaseEngine> Clone(void *stream = nullptr) = 0;
virtual ~BaseEngine() {} virtual ~BaseEngine() {}
}; };
......
...@@ -28,14 +28,14 @@ namespace jit { ...@@ -28,14 +28,14 @@ namespace jit {
InterpreterEngine::InterpreterEngine(const std::shared_ptr<FunctionInfo> &info, InterpreterEngine::InterpreterEngine(const std::shared_ptr<FunctionInfo> &info,
const VariableMap &params_dict, const VariableMap &params_dict,
const phi::Place &place) const phi::Place &place)
: info_(info), place_(place) { : info_(info), params_dict_(params_dict), place_(place) {
info_->RemoveDescFeedFetch(); info_->RemoveDescFeedFetch();
PADDLE_ENFORCE_GT( PADDLE_ENFORCE_GT(
static_cast<int64_t>(info_->ProgramDesc().Block(0).OpSize()), static_cast<int64_t>(info_->ProgramDesc().Block(0).OpSize()),
0, 0,
platform::errors::PreconditionNotMet( platform::errors::PreconditionNotMet(
"There is no operator in ProgramDesc.")); "There is no operator in ProgramDesc."));
utils::ShareParamsIntoScope(info_->ParamNames(), params_dict, &scope_); utils::ShareParamsIntoScope(info_->ParamNames(), params_dict_, &scope_);
VLOG(6) << framework::GenScopeTreeDebugInfo(&scope_); VLOG(6) << framework::GenScopeTreeDebugInfo(&scope_);
CreateInterpreterCore(); CreateInterpreterCore();
} }
...@@ -98,5 +98,10 @@ const std::shared_ptr<FunctionInfo> &InterpreterEngine::Info() const { ...@@ -98,5 +98,10 @@ const std::shared_ptr<FunctionInfo> &InterpreterEngine::Info() const {
return info_; return info_;
} }
std::unique_ptr<BaseEngine> InterpreterEngine::Clone(void *stream) {
auto *x = new InterpreterEngine(info_, params_dict_, place_);
return std::unique_ptr<BaseEngine>(x);
}
} // namespace jit } // namespace jit
} // namespace paddle } // namespace paddle
...@@ -43,14 +43,18 @@ class InterpreterEngine : public BaseEngine { ...@@ -43,14 +43,18 @@ class InterpreterEngine : public BaseEngine {
void CreateInterpreterCore(); void CreateInterpreterCore();
std::vector<Tensor> operator()(const std::vector<Tensor> &inputs); std::vector<Tensor> operator()(const std::vector<Tensor> &inputs) override;
std::vector<DenseTensor> operator()(const std::vector<DenseTensor> &inputs); std::vector<DenseTensor> operator()(
const std::vector<DenseTensor> &inputs) override;
const std::shared_ptr<FunctionInfo> &Info() const; const std::shared_ptr<FunctionInfo> &Info() const;
std::unique_ptr<BaseEngine> Clone(void *stream = nullptr) override;
private: private:
std::shared_ptr<FunctionInfo> info_; std::shared_ptr<FunctionInfo> info_;
VariableMap params_dict_;
framework::Scope scope_; framework::Scope scope_;
phi::Place place_; phi::Place place_;
std::shared_ptr<framework::InterpreterCore> inner_interpreter_; std::shared_ptr<framework::InterpreterCore> inner_interpreter_;
......
...@@ -55,6 +55,17 @@ PredictorEngine::PredictorEngine(const std::shared_ptr<FunctionInfo> &info, ...@@ -55,6 +55,17 @@ PredictorEngine::PredictorEngine(const std::shared_ptr<FunctionInfo> &info,
scope_, std::make_shared<framework::ProgramDesc>(info_->ProgramDesc())); scope_, std::make_shared<framework::ProgramDesc>(info_->ProgramDesc()));
} }
PredictorEngine::PredictorEngine(
const std::shared_ptr<FunctionInfo> &info,
const std::shared_ptr<framework::Scope> &scope,
const phi::Place &place,
const std::shared_ptr<PaddlePredictor> &predictor)
: info_(info),
scope_(scope),
place_(place),
predictor_(std::dynamic_pointer_cast<AnalysisPredictor, PaddlePredictor>(
predictor)) {}
std::vector<Tensor> PredictorEngine::operator()( std::vector<Tensor> PredictorEngine::operator()(
const std::vector<Tensor> &inputs) { const std::vector<Tensor> &inputs) {
auto dense_tensors = utils::ToDenseTensors(inputs); auto dense_tensors = utils::ToDenseTensors(inputs);
...@@ -188,5 +199,11 @@ static bool PaddleTensorToDenseTensor(const PaddleTensor &pt, ...@@ -188,5 +199,11 @@ static bool PaddleTensorToDenseTensor(const PaddleTensor &pt,
return true; return true;
} }
std::unique_ptr<BaseEngine> PredictorEngine::Clone(void *stream) {
auto *x = new PredictorEngine(
info_, scope_, place_, std::move(predictor_->Clone(stream)));
return std::unique_ptr<BaseEngine>(x);
}
} // namespace jit } // namespace jit
} // namespace paddle } // namespace paddle
...@@ -20,6 +20,7 @@ ...@@ -20,6 +20,7 @@
namespace paddle { namespace paddle {
class AnalysisPredictor; class AnalysisPredictor;
class PaddlePredictor;
namespace framework { namespace framework {
class Scope; class Scope;
...@@ -33,11 +34,19 @@ class PredictorEngine : public BaseEngine { ...@@ -33,11 +34,19 @@ class PredictorEngine : public BaseEngine {
const VariableMap &params_dict, const VariableMap &params_dict,
const phi::Place &place); const phi::Place &place);
PredictorEngine(const std::shared_ptr<FunctionInfo> &info,
const std::shared_ptr<framework::Scope> &scope,
const phi::Place &place,
const std::shared_ptr<PaddlePredictor> &predictor);
~PredictorEngine() noexcept {} ~PredictorEngine() noexcept {}
std::vector<Tensor> operator()(const std::vector<Tensor> &inputs); std::vector<Tensor> operator()(const std::vector<Tensor> &inputs) override;
std::vector<DenseTensor> operator()(
const std::vector<DenseTensor> &inputs) override;
std::vector<DenseTensor> operator()(const std::vector<DenseTensor> &inputs); std::unique_ptr<BaseEngine> Clone(void *stream = nullptr) override;
private: private:
std::shared_ptr<FunctionInfo> info_; std::shared_ptr<FunctionInfo> info_;
......
...@@ -30,7 +30,10 @@ Layer::Layer(const VariableMap& params_map, ...@@ -30,7 +30,10 @@ Layer::Layer(const VariableMap& params_map,
const VariableMap& attrs_map, const VariableMap& attrs_map,
const FunctionInfoMap& info_map, const FunctionInfoMap& info_map,
const phi::Place& place) const phi::Place& place)
: params_map_(params_map), attrs_map_(attrs_map), info_map_(info_map) { : params_map_(params_map),
attrs_map_(attrs_map),
info_map_(info_map),
place_(place) {
unit_.reset(new CompilationUnit()); unit_.reset(new CompilationUnit());
} }
...@@ -94,5 +97,12 @@ PD_SPECIALZE_ATTRIBUTE_TYPE(std::vector<int>) ...@@ -94,5 +97,12 @@ PD_SPECIALZE_ATTRIBUTE_TYPE(std::vector<int>)
PD_SPECIALZE_ATTRIBUTE_TYPE(std::vector<float>) PD_SPECIALZE_ATTRIBUTE_TYPE(std::vector<float>)
PD_SPECIALZE_ATTRIBUTE_TYPE(std::vector<std::string>) PD_SPECIALZE_ATTRIBUTE_TYPE(std::vector<std::string>)
std::shared_ptr<Layer> Layer::Clone(void* stream) {
std::shared_ptr<Layer> x =
std::make_shared<Layer>(params_map_, attrs_map_, info_map_, place_);
x->unit_ = unit_->Clone(stream);
return x;
}
} // namespace jit } // namespace jit
} // namespace paddle } // namespace paddle
...@@ -67,10 +67,13 @@ class Layer { ...@@ -67,10 +67,13 @@ class Layer {
std::vector<std::string> FunctionNames() const; std::vector<std::string> FunctionNames() const;
std::shared_ptr<Layer> Clone(void* stream = nullptr);
private: private:
VariableMap params_map_; VariableMap params_map_;
VariableMap attrs_map_; VariableMap attrs_map_;
FunctionInfoMap info_map_; FunctionInfoMap info_map_;
phi::Place place_;
std::shared_ptr<CompilationUnit> unit_; std::shared_ptr<CompilationUnit> unit_;
}; };
......
...@@ -20,6 +20,7 @@ ...@@ -20,6 +20,7 @@
#include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/variable.h" #include "paddle/fluid/framework/variable.h"
#include "paddle/fluid/platform/timer.h"
#include "paddle/phi/api/include/api.h" #include "paddle/phi/api/include/api.h"
#include "paddle/phi/core/dense_tensor.h" #include "paddle/phi/core/dense_tensor.h"
#include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/core/kernel_registry.h"
...@@ -78,7 +79,11 @@ TEST(CpuLayerTest, Function) { ...@@ -78,7 +79,11 @@ TEST(CpuLayerTest, Function) {
TEST(CpuLayerTest, Construct) { TEST(CpuLayerTest, Construct) {
auto place = phi::CPUPlace(); auto place = phi::CPUPlace();
std::string path = "./multi_program_load/export"; std::string path = "./multi_program_load/export";
paddle::platform::Timer timer;
timer.Start();
auto layer = jit::Load(path, place); auto layer = jit::Load(path, place);
timer.Pause();
std::cout << "jit::Load coast" << timer.ElapsedMS() << std::endl;
float fbias = layer.Attribute<float>("fbias"); float fbias = layer.Attribute<float>("fbias");
EXPECT_FLOAT_EQ(fbias, 1.4); EXPECT_FLOAT_EQ(fbias, 1.4);
...@@ -119,6 +124,41 @@ TEST(CpuLayerTest, Construct) { ...@@ -119,6 +124,41 @@ TEST(CpuLayerTest, Construct) {
EXPECT_NEAR(out_data[0], pow(1.41562390, 2.0), 1e-6); EXPECT_NEAR(out_data[0], pow(1.41562390, 2.0), 1e-6);
} }
TEST(CpuLayerTest, Clone) {
auto place = phi::CPUPlace();
std::string path = "./multi_program_load/export";
paddle::platform::Timer timer;
timer.Start();
auto layer = jit::Load(path, place);
timer.Pause();
std::cout << "jit::Load cost " << timer.ElapsedMS() << " ms" << std::endl;
timer.Start();
auto layer2 = layer.Clone();
timer.Pause();
std::cout << "jit::Layer::Clone cost " << timer.ElapsedMS() << " ms"
<< std::endl;
float fbias = layer2->Attribute<float>("fbias");
EXPECT_FLOAT_EQ(fbias, 1.4);
auto inputs = PrepareInputs(place);
auto outs = layer2->forward(inputs);
auto out_data = outs[0].data<float>();
EXPECT_NEAR(out_data[0], 0.02194316, 1e-6);
auto func = layer2->Function("infer");
EXPECT_TRUE(func.IsValid());
outs = func(inputs);
out_data = outs[0].data<float>();
EXPECT_NEAR(out_data[0], 1.41562390, 1e-6);
auto pow_out =
paddle::experimental::pow(outs[0], paddle::experimental::Scalar(2));
out_data = pow_out.data<float>();
EXPECT_NEAR(out_data[0], pow(1.41562390, 2.0), 1e-6);
}
#if defined(PADDLE_WITH_CUDA) #if defined(PADDLE_WITH_CUDA)
TEST(GpuLayerTest, Construct) { TEST(GpuLayerTest, Construct) {
auto place = phi::GPUPlace(); auto place = phi::GPUPlace();
...@@ -147,6 +187,22 @@ TEST(GpuLayerTest, Construct) { ...@@ -147,6 +187,22 @@ TEST(GpuLayerTest, Construct) {
out_data = cpu_tensor.data<float>(); out_data = cpu_tensor.data<float>();
EXPECT_NEAR(out_data[0], sqrt(1.41562390), 1e-6); EXPECT_NEAR(out_data[0], sqrt(1.41562390), 1e-6);
} }
TEST(GpuLayerTest, Clone) {
auto place = phi::GPUPlace();
std::string path = "./multi_program_load/export";
auto layer = jit::Load(path, place);
auto inputs = PrepareInputs(place);
auto layer2 = layer.Clone();
auto outs = layer2->forward(inputs);
auto gpu_tensor = outs[0];
auto cpu_tensor =
paddle::experimental::copy_to(gpu_tensor, phi::CPUPlace(), true);
auto out_data = cpu_tensor.data<float>();
EXPECT_NEAR(out_data[0], 0.02194316, 1e-6);
}
#endif #endif
} // namespace jit } // namespace jit
......
...@@ -30,8 +30,10 @@ DECLARE_string(jit_engine_type); ...@@ -30,8 +30,10 @@ DECLARE_string(jit_engine_type);
namespace paddle { namespace paddle {
namespace jit { namespace jit {
using FunctionInfoMap = using FunctionInfoMap =
std::unordered_map<std::string, std::shared_ptr<FunctionInfo>>; std::unordered_map<std::string, std::shared_ptr<FunctionInfo>>;
Layer Deserializer::operator()(const std::string& path, Layer Deserializer::operator()(const std::string& path,
const phi::Place& place) { const phi::Place& place) {
const auto& pdmodel_paths = utils::PdmodelFilePaths(path); const auto& pdmodel_paths = utils::PdmodelFilePaths(path);
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册