api_anakin_engine.cc 3.8 KB
Newer Older
Y
Yan Chunwei 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

L
Luo Tao 已提交
15
#include "paddle/fluid/inference/api/api_anakin_engine.h"
Y
Yan Chunwei 已提交
16
#include <cuda.h>
L
Luo Tao 已提交
17
#include <vector>
Y
Yan Chunwei 已提交
18 19 20 21 22 23 24 25 26

namespace paddle {

PaddleInferenceAnakinPredictor::PaddleInferenceAnakinPredictor(
    const AnakinConfig &config) {
  CHECK(Init(config));
}

bool PaddleInferenceAnakinPredictor::Init(const AnakinConfig &config) {
C
cuichaowen 已提交
27 28 29 30 31 32 33 34 35 36
  if (!(graph_.load(config.model_file))) {
    return false;
  }
  graph_.ResetBatchSize("input_0", config.max_batch_size);
  // optimization for graph
  if (!(graph_.Optimize())) {
    return false;
  }
  // construct executer
  executor_.init(graph_);
Y
Yan Chunwei 已提交
37 38 39 40 41
  return true;
}

bool PaddleInferenceAnakinPredictor::Run(
    const std::vector<PaddleTensor> &inputs,
42
    std::vector<PaddleTensor> *output_data, int batch_size) {
Y
Yan Chunwei 已提交
43 44 45 46 47 48
  for (const auto &input : inputs) {
    if (input.dtype != PaddleDType::FLOAT32) {
      LOG(ERROR) << "Only support float type inputs. " << input.name
                 << "'s type is not float";
      return false;
    }
C
cuichaowen 已提交
49 50
    auto d_tensor_in_p = executor_.get_in(input.name);
    float *d_data_p = d_tensor_in_p->mutable_data();
51
    if (cudaMemcpy(d_data_p, static_cast<float *>(input.data.data()),
C
cuichaowen 已提交
52 53 54 55 56
                   d_tensor_in_p->valid_size() * sizeof(float),
                   cudaMemcpyHostToDevice) != 0) {
      LOG(ERROR) << "copy data from CPU to GPU error";
      return false;
    }
57
    cudaStreamSynchronize(NULL);
Y
Yan Chunwei 已提交
58 59
  }

C
cuichaowen 已提交
60
  executor_.prediction();
Y
Yan Chunwei 已提交
61 62 63 64 65 66

  if (output_data->empty()) {
    LOG(ERROR) << "At least one output should be set with tensors' names.";
    return false;
  }
  for (auto &output : *output_data) {
C
cuichaowen 已提交
67
    auto *tensor = executor_.get_out(output.name);
Y
Yan Chunwei 已提交
68
    output.shape = tensor->shape();
69 70 71
    if (output.data.length() < tensor->valid_size() * sizeof(float)) {
      output.data.Resize(tensor->valid_size() * sizeof(float));
    }
Y
Yan Chunwei 已提交
72
    // Copy data from GPU -> CPU
73
    if (cudaMemcpy(output.data.data(), tensor->mutable_data(),
C
cuichaowen 已提交
74
                   tensor->valid_size() * sizeof(float),
Y
Yan Chunwei 已提交
75 76 77 78
                   cudaMemcpyDeviceToHost) != 0) {
      LOG(ERROR) << "copy data from GPU to CPU error";
      return false;
    }
79
    cudaStreamSynchronize(NULL);
Y
Yan Chunwei 已提交
80 81 82 83
  }
  return true;
}

C
cuichaowen 已提交
84 85 86 87 88 89 90
anakin::Net<anakin::NV, anakin::saber::AK_FLOAT, anakin::Precision::FP32>
    &PaddleInferenceAnakinPredictor::get_executer() {
  return executor_;
}

// the cloned new Predictor of anakin share the same net weights from original
// Predictor
Y
Yan Chunwei 已提交
91
std::unique_ptr<PaddlePredictor> PaddleInferenceAnakinPredictor::Clone() {
C
cuichaowen 已提交
92 93 94 95 96 97 98 99 100 101 102 103
  VLOG(3) << "Anakin Predictor::clone";
  std::unique_ptr<PaddlePredictor> cls(new PaddleInferenceAnakinPredictor());
  // construct executer from other graph
  auto anakin_predictor_p =
      dynamic_cast<PaddleInferenceAnakinPredictor *>(cls.get());
  if (!anakin_predictor_p) {
    LOG(ERROR) << "fail to call Init";
    return nullptr;
  }
  anakin_predictor_p->get_executer().init(graph_);

  return std::move(cls);
Y
Yan Chunwei 已提交
104 105 106 107
}

// A factory to help create difference predictor.
template <>
108 109
std::unique_ptr<PaddlePredictor> CreatePaddlePredictor<
    AnakinConfig, PaddleEngineKind::kAnakin>(const AnakinConfig &config) {
C
cuichaowen 已提交
110
  VLOG(3) << "Anakin Predictor create.";
Y
Yan Chunwei 已提交
111 112 113
  std::unique_ptr<PaddlePredictor> x(
      new PaddleInferenceAnakinPredictor(config));
  return x;
L
Luo Tao 已提交
114
}
Y
Yan Chunwei 已提交
115 116

}  // namespace paddle