// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. #include "paddle/fluid/inference/anakin/engine.h" #include #include #include #include #include "paddle/fluid/framework/ddim.h" using anakin::Precision; using anakin::OpRunType; using paddle::framework::LoDTensor; template using AnakinNetT = anakin::Net; template using AnakinGraphT = anakin::graph::Graph; namespace paddle { namespace inference { namespace anakin { template AnakinEngine::AnakinEngine(bool need_summary, int device, int max_batch_size) : graph_(new AnakinGraphT()), net_(new AnakinNetT(need_summary)) { device_ = device; max_batch_size_ = max_batch_size; } template AnakinEngine::~AnakinEngine() {} template void AnakinEngine::SetInputShape( const std::string &name, std::vector shape) { graph_->AddOpAttr<::anakin::PTuple>(name, "input_shape", std::move(shape)); } template void AnakinEngine::InitGraph() { net_->init(*graph_); } template void AnakinEngine::AddOp( const std::string &name, const std::string &type, const std::vector &inputs, const std::vector &outputs) { PADDLE_ENFORCE(graph_->AddOp(name, type, inputs, outputs), "Add operation."); } template void AnakinEngine::Execute( const std::map &inputs, const std::map &outputs, cudaStream_t stream) { for (const auto &input : inputs) { auto *tensor = input.second; auto *data = tensor->data(); auto fluid_input_shape = framework::vectorize2int(tensor->dims()); auto *anakin_input = net_->get_in(input.first); auto net_shape = anakin_input->shape(); if (tensor->numel() > net_shape.count()) { graph_->Reshape(input.first, fluid_input_shape); net_.reset(new AnakinNetT(true)); net_->init(*graph_); anakin_input = net_->get_in(input.first); } anakin_input->reshape(fluid_input_shape); net_shape = anakin_input->shape(); ::anakin::saber::Tensor tmp_anakin_tensor(data, TargetT(), 0, // net_shape); fluid_input_shape); anakin_input->copy_from(tmp_anakin_tensor); } cudaDeviceSynchronize(); net_->prediction(); for (const auto &output : outputs) { platform::CUDAPlace gpu_place(device_); auto *tensor = output.second; auto *anakin_output = net_->get_out(output.first); auto *anakin_data = anakin_output->data(); auto anakin_output_shape = anakin_output->valid_shape(); tensor->Resize(framework::make_ddim(anakin_output_shape)); auto *fluid_data = tensor->mutable_data(gpu_place); memory::Copy(gpu_place, static_cast(fluid_data), gpu_place, static_cast(anakin_data), tensor->numel() * sizeof(float), stream); } cudaDeviceSynchronize(); } template void AnakinEngine::Freeze() { PADDLE_ENFORCE(graph_->Freeze_v3(), "Freeze anakin subgraph."); } template void AnakinEngine::Optimize() { PADDLE_ENFORCE(graph_->Optimize(), "Graph optimization."); } template std::unique_ptr> AnakinEngine::Clone() { auto *engine = new AnakinEngine(); engine->net_ = std::move(net_->Clone()); return std::unique_ptr(engine); } template class AnakinEngine<::anakin::saber::NV, ::anakin::Precision::FP32>; } // namespace anakin } // namespace inference } // namespace paddle