未验证 提交 9fa2eb38 编写于 作者: H Hui Zhang 提交者: GitHub

jit layer support multi thread and fix predictor clone (#50095)

* jit layer support multi thread

* fix bug

* clone prediector not do graph optimizer

* format

* fix comment and format

* fix override and fromat

* fix

* fix
上级 c62657b3
......@@ -1086,6 +1086,7 @@ bool AnalysisPredictor::GetFetch(std::vector<PaddleTensor> *outputs,
}
void AnalysisPredictor::PrepareArgument() {
VLOG(3) << "AnalysisPredictor::PrepareArgument";
// Init std::unique_ptr argument_.
argument_.reset(new Argument);
argument_->SetUseGPU(config_.use_gpu());
......@@ -2246,10 +2247,12 @@ AnalysisPredictor::~AnalysisPredictor() {
}
std::unique_ptr<PaddlePredictor> AnalysisPredictor::Clone(void *stream) {
VLOG(3) << "AnalysisPredictor::Clone";
std::lock_guard<std::mutex> lk(clone_mutex_);
auto *x = new AnalysisPredictor(config_);
x->status_is_cloned_ = true;
x->root_predictor_id_ = this->root_predictor_id_;
x->config_.apply_optim_ = false;
if (config_.use_external_stream_ && stream == nullptr) {
PADDLE_THROW(platform::errors::InvalidArgument(
"config has been configured to use external stream, but the Clone "
......
......@@ -38,5 +38,13 @@ void CompilationUnit::SetEngine(const std::string &name,
const jit::EngineMap &CompilationUnit::EngineMap() const { return engine_map_; }
std::shared_ptr<CompilationUnit> CompilationUnit::Clone(void *stream) {
auto x = std::make_shared<CompilationUnit>();
for (auto &it : engine_map_) {
x->SetEngine(it.first, std::move(it.second->Clone(stream)));
}
return x;
}
} // namespace jit
} // namespace paddle
......@@ -36,6 +36,8 @@ class CompilationUnit {
const jit::EngineMap &EngineMap() const;
std::shared_ptr<CompilationUnit> Clone(void *stream = nullptr);
private:
jit::EngineMap engine_map_;
};
......
......@@ -29,6 +29,8 @@ class BaseEngine {
virtual std::vector<Tensor> operator()(const std::vector<Tensor> &inputs) = 0;
virtual std::unique_ptr<BaseEngine> Clone(void *stream = nullptr) = 0;
virtual ~BaseEngine() {}
};
......
......@@ -28,14 +28,14 @@ namespace jit {
InterpreterEngine::InterpreterEngine(const std::shared_ptr<FunctionInfo> &info,
const VariableMap &params_dict,
const phi::Place &place)
: info_(info), place_(place) {
: info_(info), params_dict_(params_dict), place_(place) {
info_->RemoveDescFeedFetch();
PADDLE_ENFORCE_GT(
static_cast<int64_t>(info_->ProgramDesc().Block(0).OpSize()),
0,
platform::errors::PreconditionNotMet(
"There is no operator in ProgramDesc."));
utils::ShareParamsIntoScope(info_->ParamNames(), params_dict, &scope_);
utils::ShareParamsIntoScope(info_->ParamNames(), params_dict_, &scope_);
VLOG(6) << framework::GenScopeTreeDebugInfo(&scope_);
CreateInterpreterCore();
}
......@@ -98,5 +98,10 @@ const std::shared_ptr<FunctionInfo> &InterpreterEngine::Info() const {
return info_;
}
std::unique_ptr<BaseEngine> InterpreterEngine::Clone(void *stream) {
auto *x = new InterpreterEngine(info_, params_dict_, place_);
return std::unique_ptr<BaseEngine>(x);
}
} // namespace jit
} // namespace paddle
......@@ -43,14 +43,18 @@ class InterpreterEngine : public BaseEngine {
void CreateInterpreterCore();
std::vector<Tensor> operator()(const std::vector<Tensor> &inputs);
std::vector<Tensor> operator()(const std::vector<Tensor> &inputs) override;
std::vector<DenseTensor> operator()(const std::vector<DenseTensor> &inputs);
std::vector<DenseTensor> operator()(
const std::vector<DenseTensor> &inputs) override;
const std::shared_ptr<FunctionInfo> &Info() const;
std::unique_ptr<BaseEngine> Clone(void *stream = nullptr) override;
private:
std::shared_ptr<FunctionInfo> info_;
VariableMap params_dict_;
framework::Scope scope_;
phi::Place place_;
std::shared_ptr<framework::InterpreterCore> inner_interpreter_;
......
......@@ -55,6 +55,17 @@ PredictorEngine::PredictorEngine(const std::shared_ptr<FunctionInfo> &info,
scope_, std::make_shared<framework::ProgramDesc>(info_->ProgramDesc()));
}
PredictorEngine::PredictorEngine(
const std::shared_ptr<FunctionInfo> &info,
const std::shared_ptr<framework::Scope> &scope,
const phi::Place &place,
const std::shared_ptr<PaddlePredictor> &predictor)
: info_(info),
scope_(scope),
place_(place),
predictor_(std::dynamic_pointer_cast<AnalysisPredictor, PaddlePredictor>(
predictor)) {}
std::vector<Tensor> PredictorEngine::operator()(
const std::vector<Tensor> &inputs) {
auto dense_tensors = utils::ToDenseTensors(inputs);
......@@ -188,5 +199,11 @@ static bool PaddleTensorToDenseTensor(const PaddleTensor &pt,
return true;
}
std::unique_ptr<BaseEngine> PredictorEngine::Clone(void *stream) {
auto *x = new PredictorEngine(
info_, scope_, place_, std::move(predictor_->Clone(stream)));
return std::unique_ptr<BaseEngine>(x);
}
} // namespace jit
} // namespace paddle
......@@ -20,6 +20,7 @@
namespace paddle {
class AnalysisPredictor;
class PaddlePredictor;
namespace framework {
class Scope;
......@@ -33,11 +34,19 @@ class PredictorEngine : public BaseEngine {
const VariableMap &params_dict,
const phi::Place &place);
PredictorEngine(const std::shared_ptr<FunctionInfo> &info,
const std::shared_ptr<framework::Scope> &scope,
const phi::Place &place,
const std::shared_ptr<PaddlePredictor> &predictor);
~PredictorEngine() noexcept {}
std::vector<Tensor> operator()(const std::vector<Tensor> &inputs);
std::vector<Tensor> operator()(const std::vector<Tensor> &inputs) override;
std::vector<DenseTensor> operator()(
const std::vector<DenseTensor> &inputs) override;
std::vector<DenseTensor> operator()(const std::vector<DenseTensor> &inputs);
std::unique_ptr<BaseEngine> Clone(void *stream = nullptr) override;
private:
std::shared_ptr<FunctionInfo> info_;
......
......@@ -30,7 +30,10 @@ Layer::Layer(const VariableMap& params_map,
const VariableMap& attrs_map,
const FunctionInfoMap& info_map,
const phi::Place& place)
: params_map_(params_map), attrs_map_(attrs_map), info_map_(info_map) {
: params_map_(params_map),
attrs_map_(attrs_map),
info_map_(info_map),
place_(place) {
unit_.reset(new CompilationUnit());
}
......@@ -94,5 +97,12 @@ PD_SPECIALZE_ATTRIBUTE_TYPE(std::vector<int>)
PD_SPECIALZE_ATTRIBUTE_TYPE(std::vector<float>)
PD_SPECIALZE_ATTRIBUTE_TYPE(std::vector<std::string>)
std::shared_ptr<Layer> Layer::Clone(void* stream) {
std::shared_ptr<Layer> x =
std::make_shared<Layer>(params_map_, attrs_map_, info_map_, place_);
x->unit_ = unit_->Clone(stream);
return x;
}
} // namespace jit
} // namespace paddle
......@@ -67,10 +67,13 @@ class Layer {
std::vector<std::string> FunctionNames() const;
std::shared_ptr<Layer> Clone(void* stream = nullptr);
private:
VariableMap params_map_;
VariableMap attrs_map_;
FunctionInfoMap info_map_;
phi::Place place_;
std::shared_ptr<CompilationUnit> unit_;
};
......
......@@ -20,6 +20,7 @@
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/variable.h"
#include "paddle/fluid/platform/timer.h"
#include "paddle/phi/api/include/api.h"
#include "paddle/phi/core/dense_tensor.h"
#include "paddle/phi/core/kernel_registry.h"
......@@ -78,7 +79,11 @@ TEST(CpuLayerTest, Function) {
TEST(CpuLayerTest, Construct) {
auto place = phi::CPUPlace();
std::string path = "./multi_program_load/export";
paddle::platform::Timer timer;
timer.Start();
auto layer = jit::Load(path, place);
timer.Pause();
std::cout << "jit::Load coast" << timer.ElapsedMS() << std::endl;
float fbias = layer.Attribute<float>("fbias");
EXPECT_FLOAT_EQ(fbias, 1.4);
......@@ -119,6 +124,41 @@ TEST(CpuLayerTest, Construct) {
EXPECT_NEAR(out_data[0], pow(1.41562390, 2.0), 1e-6);
}
TEST(CpuLayerTest, Clone) {
auto place = phi::CPUPlace();
std::string path = "./multi_program_load/export";
paddle::platform::Timer timer;
timer.Start();
auto layer = jit::Load(path, place);
timer.Pause();
std::cout << "jit::Load cost " << timer.ElapsedMS() << " ms" << std::endl;
timer.Start();
auto layer2 = layer.Clone();
timer.Pause();
std::cout << "jit::Layer::Clone cost " << timer.ElapsedMS() << " ms"
<< std::endl;
float fbias = layer2->Attribute<float>("fbias");
EXPECT_FLOAT_EQ(fbias, 1.4);
auto inputs = PrepareInputs(place);
auto outs = layer2->forward(inputs);
auto out_data = outs[0].data<float>();
EXPECT_NEAR(out_data[0], 0.02194316, 1e-6);
auto func = layer2->Function("infer");
EXPECT_TRUE(func.IsValid());
outs = func(inputs);
out_data = outs[0].data<float>();
EXPECT_NEAR(out_data[0], 1.41562390, 1e-6);
auto pow_out =
paddle::experimental::pow(outs[0], paddle::experimental::Scalar(2));
out_data = pow_out.data<float>();
EXPECT_NEAR(out_data[0], pow(1.41562390, 2.0), 1e-6);
}
#if defined(PADDLE_WITH_CUDA)
TEST(GpuLayerTest, Construct) {
auto place = phi::GPUPlace();
......@@ -147,6 +187,22 @@ TEST(GpuLayerTest, Construct) {
out_data = cpu_tensor.data<float>();
EXPECT_NEAR(out_data[0], sqrt(1.41562390), 1e-6);
}
TEST(GpuLayerTest, Clone) {
auto place = phi::GPUPlace();
std::string path = "./multi_program_load/export";
auto layer = jit::Load(path, place);
auto inputs = PrepareInputs(place);
auto layer2 = layer.Clone();
auto outs = layer2->forward(inputs);
auto gpu_tensor = outs[0];
auto cpu_tensor =
paddle::experimental::copy_to(gpu_tensor, phi::CPUPlace(), true);
auto out_data = cpu_tensor.data<float>();
EXPECT_NEAR(out_data[0], 0.02194316, 1e-6);
}
#endif
} // namespace jit
......
......@@ -30,8 +30,10 @@ DECLARE_string(jit_engine_type);
namespace paddle {
namespace jit {
using FunctionInfoMap =
std::unordered_map<std::string, std::shared_ptr<FunctionInfo>>;
Layer Deserializer::operator()(const std::string& path,
const phi::Place& place) {
const auto& pdmodel_paths = utils::PdmodelFilePaths(path);
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册