未验证 提交 d008260f 编写于 作者: 石晓伟 提交者: GitHub

update the initialization of anakin subgraph (#17880)

test=develop
上级 ae576f3c
......@@ -32,18 +32,26 @@ namespace paddle {
namespace inference {
namespace anakin {
template <typename TargetT, Precision PrecisionType, OpRunType RunType>
extern std::once_flag
AnakinEngine<TargetT, PrecisionType, RunType>::init_anakin_;
template <typename TargetT, Precision PrecisionType, OpRunType RunType>
AnakinEngine<TargetT, PrecisionType, RunType>::AnakinEngine(
bool need_summary, int device, int max_batch_size,
std::map<std::string, std::vector<int>> max_input_shape,
std::vector<std::string> program_inputs, bool auto_config_layout)
: graph_(new AnakinGraphT<TargetT, PrecisionType>()),
net_(new AnakinNetT<TargetT, PrecisionType, RunType>(need_summary)) {
device_ = device;
max_batch_size_ = max_batch_size;
max_input_shape_ = max_input_shape;
program_inputs_ = program_inputs;
auto_config_layout_ = auto_config_layout;
: device_(device),
max_batch_size_(max_batch_size),
max_input_shape_(max_input_shape),
program_inputs_(program_inputs),
auto_config_layout_(auto_config_layout) {
std::call_once(init_anakin_, [this]() {
::anakin::TargetWrapper<TargetT>::set_device(device_);
::anakin::Env<TargetT>::env_init();
});
graph_.reset(new AnakinGraphT<TargetT, PrecisionType>());
net_.reset(new AnakinNetT<TargetT, PrecisionType, RunType>(need_summary));
}
template <typename TargetT, Precision PrecisionType, OpRunType RunType>
......@@ -102,7 +110,7 @@ void AnakinEngine<TargetT, PrecisionType, RunType>::BindInput(
anakin_input = net_->get_in(input.first);
}
anakin_input->reshape(fluid_input_shape);
::anakin::saber::Tensor<TargetT> tmp_anakin_tensor(data, TargetT(), 0,
::anakin::saber::Tensor<TargetT> tmp_anakin_tensor(data, TargetT(), device_,
fluid_input_shape);
anakin_input->copy_from(tmp_anakin_tensor);
}
......
......@@ -114,12 +114,13 @@ class AnakinEngine {
private:
bool initialized_{false};
int device_;
int max_batch_size_;
std::map<std::string, std::vector<int>> max_input_shape_;
int device_;
std::vector<std::string> program_inputs_;
std::unique_ptr<GraphT> graph_;
std::unique_ptr<NetT> net_;
std::vector<std::string> program_inputs_;
static std::once_flag init_anakin_;
std::unordered_map<std::string, float> tensor_scales_;
// Always be false in gpu mode but true in most cpu cases.
bool auto_config_layout_;
......
......@@ -371,6 +371,7 @@ float AnalysisConfig::fraction_of_gpu_memory_for_pool() const {
// Get the GPU memory details and calculate the fraction of memory for the
// GPU memory pool.
size_t gpu_used, gpu_available;
platform::SetDeviceId(device_id_);
platform::GpuMemoryUsage(&gpu_used, &gpu_available);
double total_gpu_memory = (gpu_used + gpu_available) / 1024. / 1024.;
float fraction_of_gpu_memory =
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册