api_anakin_engine.cc 5.7 KB
Newer Older
Y
Yan Chunwei 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

L
Luo Tao 已提交
15
#include "paddle/fluid/inference/api/api_anakin_engine.h"
Y
Yan Chunwei 已提交
16
#include <cuda.h>
L
Luo Tao 已提交
17
#include <vector>
Y
Yan Chunwei 已提交
18 19 20

namespace paddle {

C
cuichaowen 已提交
21 22
template <typename Target>
PaddleInferenceAnakinPredictor<Target>::PaddleInferenceAnakinPredictor(
Y
Yan Chunwei 已提交
23 24 25 26
    const AnakinConfig &config) {
  CHECK(Init(config));
}

C
cuichaowen 已提交
27 28
template <typename Target>
bool PaddleInferenceAnakinPredictor<Target>::Init(const AnakinConfig &config) {
C
cuichaowen 已提交
29
  if (!(graph_.load(config.model_file))) {
C
cuichaowen 已提交
30
    LOG(FATAL) << "fail to load graph from " << config.model_file;
C
cuichaowen 已提交
31 32
    return false;
  }
C
cuichaowen 已提交
33 34 35 36
  auto inputs = graph_.get_ins();
  for (auto &input_str : inputs) {
    graph_.ResetBatchSize(input_str, config.max_batch_size);
  }
C
cuichaowen 已提交
37 38 39 40 41
  // optimization for graph
  if (!(graph_.Optimize())) {
    return false;
  }
  // construct executer
C
cuichaowen 已提交
42 43 44 45
  if (executor_p_ == nullptr) {
    executor_p_ = new anakin::Net<Target, anakin::saber::AK_FLOAT,
                                  anakin::Precision::FP32>(graph_, true);
  }
Y
Yan Chunwei 已提交
46 47 48
  return true;
}

C
cuichaowen 已提交
49 50
template <typename Target>
bool PaddleInferenceAnakinPredictor<Target>::Run(
Y
Yan Chunwei 已提交
51
    const std::vector<PaddleTensor> &inputs,
52
    std::vector<PaddleTensor> *output_data, int batch_size) {
Y
Yan Chunwei 已提交
53 54 55 56 57 58
  for (const auto &input : inputs) {
    if (input.dtype != PaddleDType::FLOAT32) {
      LOG(ERROR) << "Only support float type inputs. " << input.name
                 << "'s type is not float";
      return false;
    }
C
cuichaowen 已提交
59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81
    auto d_tensor_in_p = executor_p_->get_in(input.name);
    auto net_shape = d_tensor_in_p->valid_shape();
    if (net_shape.size() != input.shape.size()) {
      LOG(ERROR) << " input  " << input.name
                 << "'s shape size should be equal to that of net";
      return false;
    }
    int sum = 1;
    for_each(input.shape.begin(), input.shape.end(), [&](int n) { sum *= n; });
    if (sum > net_shape.count()) {
      graph_.Reshape(input.name, input.shape);
      delete executor_p_;
      executor_p_ = new anakin::Net<Target, anakin::saber::AK_FLOAT,
                                    anakin::Precision::FP32>(graph_, true);
      d_tensor_in_p = executor_p_->get_in(input.name);
    }

    anakin::saber::Shape tmp_shape;
    for (auto s : input.shape) {
      tmp_shape.push_back(s);
    }
    d_tensor_in_p->reshape(tmp_shape);

C
cuichaowen 已提交
82
    float *d_data_p = d_tensor_in_p->mutable_data();
83
    if (cudaMemcpy(d_data_p, static_cast<float *>(input.data.data()),
C
cuichaowen 已提交
84 85 86 87 88
                   d_tensor_in_p->valid_size() * sizeof(float),
                   cudaMemcpyHostToDevice) != 0) {
      LOG(ERROR) << "copy data from CPU to GPU error";
      return false;
    }
89
    cudaStreamSynchronize(NULL);
Y
Yan Chunwei 已提交
90
  }
C
cuichaowen 已提交
91 92 93
  cudaDeviceSynchronize();
  executor_p_->prediction();
  cudaDeviceSynchronize();
Y
Yan Chunwei 已提交
94 95 96 97 98 99

  if (output_data->empty()) {
    LOG(ERROR) << "At least one output should be set with tensors' names.";
    return false;
  }
  for (auto &output : *output_data) {
C
cuichaowen 已提交
100 101
    auto *tensor = executor_p_->get_out(output.name);
    output.shape = tensor->valid_shape();
102 103 104
    if (output.data.length() < tensor->valid_size() * sizeof(float)) {
      output.data.Resize(tensor->valid_size() * sizeof(float));
    }
Y
Yan Chunwei 已提交
105
    // Copy data from GPU -> CPU
106
    if (cudaMemcpy(output.data.data(), tensor->mutable_data(),
C
cuichaowen 已提交
107
                   tensor->valid_size() * sizeof(float),
Y
Yan Chunwei 已提交
108 109 110 111
                   cudaMemcpyDeviceToHost) != 0) {
      LOG(ERROR) << "copy data from GPU to CPU error";
      return false;
    }
112
    cudaStreamSynchronize(NULL);
Y
Yan Chunwei 已提交
113 114 115 116
  }
  return true;
}

C
cuichaowen 已提交
117 118 119 120
template <typename Target>
anakin::Net<Target, anakin::saber::AK_FLOAT, anakin::Precision::FP32>
    &PaddleInferenceAnakinPredictor<Target>::get_executer() {
  return *executor_p_;
C
cuichaowen 已提交
121 122 123 124
}

// the cloned new Predictor of anakin share the same net weights from original
// Predictor
C
cuichaowen 已提交
125 126 127
template <typename Target>
std::unique_ptr<PaddlePredictor>
PaddleInferenceAnakinPredictor<Target>::Clone() {
C
cuichaowen 已提交
128
  VLOG(3) << "Anakin Predictor::clone";
C
cuichaowen 已提交
129 130
  std::unique_ptr<PaddlePredictor> cls(
      new PaddleInferenceAnakinPredictor<Target>());
C
cuichaowen 已提交
131 132
  // construct executer from other graph
  auto anakin_predictor_p =
C
cuichaowen 已提交
133
      dynamic_cast<PaddleInferenceAnakinPredictor<Target> *>(cls.get());
C
cuichaowen 已提交
134 135 136 137 138 139 140
  if (!anakin_predictor_p) {
    LOG(ERROR) << "fail to call Init";
    return nullptr;
  }
  anakin_predictor_p->get_executer().init(graph_);

  return std::move(cls);
Y
Yan Chunwei 已提交
141 142
}

C
cuichaowen 已提交
143 144 145
template class PaddleInferenceAnakinPredictor<anakin::NV>;
template class PaddleInferenceAnakinPredictor<anakin::X86>;

Y
Yan Chunwei 已提交
146 147
// A factory to help create difference predictor.
template <>
148 149
std::unique_ptr<PaddlePredictor> CreatePaddlePredictor<
    AnakinConfig, PaddleEngineKind::kAnakin>(const AnakinConfig &config) {
C
cuichaowen 已提交
150
  VLOG(3) << "Anakin Predictor create.";
C
cuichaowen 已提交
151 152 153 154 155 156 157 158 159 160 161 162 163 164 165
  if (config.target_type == AnakinConfig::NVGPU) {
    VLOG(3) << "Anakin Predictor create on [ NVIDIA GPU ].";
    std::unique_ptr<PaddlePredictor> x(
        new PaddleInferenceAnakinPredictor<anakin::NV>(config));
    return x;
  } else if (config.target_type == AnakinConfig::X86) {
    VLOG(3) << "Anakin Predictor create on [ Intel X86 ].";
    std::unique_ptr<PaddlePredictor> x(
        new PaddleInferenceAnakinPredictor<anakin::X86>(config));
    return x;
  } else {
    VLOG(3) << "Anakin Predictor create on unknown platform.";
    return nullptr;
  }
};
Y
Yan Chunwei 已提交
166 167

}  // namespace paddle