api_impl.cc 12.7 KB
Newer Older
X
Xin Pan 已提交
1 2
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.

Y
Yan Chunwei 已提交
3 4 5
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
X
Xin Pan 已提交
6

Y
Yan Chunwei 已提交
7
http://www.apache.org/licenses/LICENSE-2.0
X
Xin Pan 已提交
8

Y
Yan Chunwei 已提交
9 10 11 12 13
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
X
Xin Pan 已提交
14

F
flame 已提交
15
#include <glog/logging.h>
X
Xin Pan 已提交
16 17
#include <algorithm>
#include <map>
W
Wilber 已提交
18
#include <memory>
X
Xin Pan 已提交
19 20 21 22 23 24
#include <set>
#include <sstream>
#include <string>
#include <utility>
#include <vector>

25
#include "paddle/fluid/framework/feed_fetch_method.h"
L
Luo Tao 已提交
26
#include "paddle/fluid/inference/api/api_impl.h"
Y
Yan Chunwei 已提交
27
#include "paddle/fluid/inference/api/details/reset_tensor_array.h"
28
#include "paddle/fluid/inference/api/helper.h"
W
Wilber 已提交
29
#include "paddle/fluid/inference/api/paddle_inference_api.h"
30
#include "paddle/fluid/memory/memcpy.h"
31
#include "paddle/fluid/platform/cpu_helper.h"
32 33 34
#include "paddle/fluid/platform/profiler.h"

DEFINE_bool(profile, false, "Turn on profiler for fluid");
X
Xin Pan 已提交
35 36

namespace paddle {
37 38 39 40 41 42 43 44 45 46
namespace {
using paddle::inference::Timer;

template <class T>
std::string num2str(T a) {
  std::stringstream istr;
  istr << a;
  return istr.str();
}
}  // namespace
X
Xin Pan 已提交
47

48 49 50
void NativePaddlePredictor::PrepareFeedFetch() {
  for (auto *op : inference_program_->Block(0).AllOps()) {
    if (op->Type() == "feed") {
51
      int idx = BOOST_GET_CONST(int, op->GetAttr("col"));
T
tensor-tang 已提交
52
      if (feeds_.size() <= static_cast<size_t>(idx)) {
53 54 55 56 57
        feeds_.resize(idx + 1);
      }
      feeds_[idx] = op;
      feed_names_[op->Output("Out")[0]] = idx;
    } else if (op->Type() == "fetch") {
58
      int idx = BOOST_GET_CONST(int, op->GetAttr("col"));
T
tensor-tang 已提交
59
      if (fetchs_.size() <= static_cast<size_t>(idx)) {
60 61 62 63 64 65 66
        fetchs_.resize(idx + 1);
      }
      fetchs_[idx] = op;
    }
  }
}

T
tensor-tang 已提交
67 68
bool NativePaddlePredictor::Init(
    std::shared_ptr<framework::Scope> parent_scope) {
69
  VLOG(3) << "Predictor::init()";
70 71 72 73 74 75 76 77 78
  if (FLAGS_profile) {
    LOG(WARNING) << "Profiler is actived, might affect the performance";
    LOG(INFO) << "You can turn off by set gflags '-profile false'";

    auto tracking_device = config_.use_gpu ? platform::ProfilerState::kAll
                                           : platform::ProfilerState::kCPU;
    platform::EnableProfiler(tracking_device);
  }

79
  // no matter with or without MKLDNN
L
luotao1 已提交
80
  paddle::platform::SetNumThreads(config_.cpu_math_library_num_threads());
81

Y
Yan Chunwei 已提交
82
  if (config_.use_gpu) {
X
Xin Pan 已提交
83 84 85 86
    place_ = paddle::platform::CUDAPlace(config_.device);
  } else {
    place_ = paddle::platform::CPUPlace();
  }
T
tensor-tang 已提交
87 88 89
  if (parent_scope) {
    scope_ = parent_scope;
    sub_scope_ = &(parent_scope->NewScope());
T
tensor-tang 已提交
90
    PADDLE_ENFORCE_NOT_NULL(sub_scope_, "create sub scope fail");
91 92 93 94
  } else {
    paddle::framework::InitDevices(false);
    scope_.reset(new paddle::framework::Scope());
  }
95

X
Xin Pan 已提交
96
  executor_.reset(new paddle::framework::Executor(place_));
97

X
Xin Pan 已提交
98 99 100 101
  // Initialize the inference program
  if (!config_.model_dir.empty()) {
    // Parameters are saved in separate files sited in
    // the specified `dirname`.
102 103
    inference_program_ = paddle::inference::Load(executor_.get(), scope_.get(),
                                                 config_.model_dir);
X
Xin Pan 已提交
104 105 106 107 108 109 110
  } else if (!config_.prog_file.empty() && !config_.param_file.empty()) {
    // All parameters are saved in a single file.
    // The file names should be consistent with that used
    // in Python API `fluid.io.save_inference_model`.
    inference_program_ = paddle::inference::Load(
        executor_.get(), scope_.get(), config_.prog_file, config_.param_file);
  } else {
Y
Yan Chunwei 已提交
111
    LOG(ERROR) << "fail to load inference model from " << config_.model_dir;
X
Xin Pan 已提交
112 113
    return false;
  }
114

X
Xin Pan 已提交
115
  ctx_ = executor_->Prepare(*inference_program_, 0);
116 117
  executor_->CreateVariables(*inference_program_,
                             sub_scope_ ? sub_scope_ : scope_.get(), 0);
Y
Yan Chunwei 已提交
118

X
Xin Pan 已提交
119
  // Get the feed_target_names and fetch_target_names
120
  PrepareFeedFetch();
X
Xin Pan 已提交
121 122 123
  return true;
}

124
NativePaddlePredictor::~NativePaddlePredictor() {
125 126 127 128
  if (FLAGS_profile) {
    platform::DisableProfiler(platform::EventSortingKey::kTotal,
                              "./profile.log");
  }
129 130 131
  if (sub_scope_) {
    scope_->DeleteScope(sub_scope_);
  }
L
Luo Tao 已提交
132
}
133

Y
Yan Chunwei 已提交
134
bool NativePaddlePredictor::Run(const std::vector<PaddleTensor> &inputs,
135 136
                                std::vector<PaddleTensor> *output_data,
                                int batch_size) {
F
flame 已提交
137 138 139 140 141 142 143 144 145 146
#ifndef PADDLE_ON_INFERENCE
  LOG_FIRST_N(WARNING, 5) << "The NaiveExecutor can not work properly if the "
                             "cmake flag ON_INFER is not set.";
  LOG_FIRST_N(WARNING, 5) << "Unlike the training phase, all the scopes and "
                             "variables will be reused to save the allocation "
                             "overhead.";
  LOG_FIRST_N(WARNING, 5) << "Please re-compile the inference library by "
                             "setting the cmake flag ON_INFER=ON if you are "
                             "running Paddle Inference";
#endif  // PADDLE_ON_INFERENCE
L
luotao1 已提交
147 148 149
  if (UNLIKELY(config_.cpu_math_library_num_threads() > 1)) {
    paddle::platform::SetNumThreads(config_.cpu_math_library_num_threads());
  }
150
  VLOG(3) << "Predictor::predict";
X
Xin Pan 已提交
151 152 153
  Timer timer;
  timer.tic();
  // set feed variable
154 155
  framework::Scope *scope = sub_scope_ != nullptr ? sub_scope_ : scope_.get();
  if (!SetFeed(inputs, scope)) {
X
Xin Pan 已提交
156 157 158 159 160
    LOG(ERROR) << "fail to set feed";
    return false;
  }
  // Run the inference program
  // if share variables, we need not create variables
161
  VLOG(4) << "Run prepared context";
162 163
  executor_->RunPreparedContext(ctx_.get(), scope,
                                false, /* don't create local scope each time*/
164
                                false /* don't create variable each time */);
165
  VLOG(4) << "Finish prepared context";
166 167
  // get fetch variable
  if (!GetFetch(output_data, scope)) {
168
    LOG(ERROR) << "fail to get fetches";
X
Xin Pan 已提交
169 170
    return false;
  }
M
minqiyang 已提交
171
  VLOG(3) << "predict cost: " << timer.toc() << "ms";
Y
Yan Chunwei 已提交
172

Y
Yan Chunwei 已提交
173 174 175
  // For some other vector like containers not cleaned after each batch.
  tensor_array_batch_cleaner_.CollectNoTensorVars(scope_.get());
  tensor_array_batch_cleaner_.ResetNoTensorVars();
X
Xin Pan 已提交
176 177 178
  return true;
}

Y
Yan Chunwei 已提交
179
std::unique_ptr<PaddlePredictor> NativePaddlePredictor::Clone() {
Y
Yan Chunwei 已提交
180 181
  std::lock_guard<std::mutex> lk(clone_mutex_);
  VLOG(3) << "Predictor::clone";
Y
Yan Chunwei 已提交
182
  std::unique_ptr<PaddlePredictor> cls(new NativePaddlePredictor(config_));
Y
Yan Chunwei 已提交
183 184
  // Hot fix the bug that result diff in multi-thread.
  // TODO(Superjomn) re-implement a real clone here.
L
liuwei1031 已提交
185
  PADDLE_ENFORCE_NOT_NULL(dynamic_cast<NativePaddlePredictor *>(cls.get()));
Y
Yan Chunwei 已提交
186
  if (!dynamic_cast<NativePaddlePredictor *>(cls.get())->Init(nullptr)) {
Y
Yan Chunwei 已提交
187
    LOG(ERROR) << "fail to call Init";
X
Xin Pan 已提交
188 189
    return nullptr;
  }
Y
Yan Chunwei 已提交
190

J
Fix mac  
JiabinYang 已提交
191 192 193 194
#ifdef __clang__
  // fix clang compile error
  return cls;
#else
195 196
  // fix manylinux compile error.
  return std::move(cls);
J
Fix mac  
JiabinYang 已提交
197
#endif
X
Xin Pan 已提交
198 199
}

Y
Yan Chunwei 已提交
200
bool NativePaddlePredictor::SetFeed(const std::vector<PaddleTensor> &inputs,
201
                                    framework::Scope *scope) {
202
  VLOG(3) << "Predictor::set_feed";
203
  if (inputs.size() != feeds_.size()) {
204 205
    LOG(ERROR) << "wrong feed input size, need " << feeds_.size() << " but get "
               << inputs.size();
X
Xin Pan 已提交
206 207
    return false;
  }
208 209 210 211

  // Cache the inputs memory for better concurrency performance.
  feed_tensors_.resize(inputs.size());

212
  for (size_t i = 0; i < inputs.size(); ++i) {
213
    auto &input = feed_tensors_[i];
214
    framework::DDim ddim = framework::make_ddim(inputs[i].shape);
X
Xin Pan 已提交
215 216
    void *input_ptr;
    if (inputs[i].dtype == PaddleDType::INT64) {
217
      input_ptr = input.mutable_data<int64_t>(ddim, place_);
X
Xin Pan 已提交
218
    } else if (inputs[i].dtype == PaddleDType::FLOAT32) {
219
      input_ptr = input.mutable_data<float>(ddim, place_);
220 221
    } else if (inputs[i].dtype == PaddleDType::INT32) {
      input_ptr = input.mutable_data<int32_t>(ddim, place_);
X
Xin Pan 已提交
222 223 224 225 226
    } else {
      LOG(ERROR) << "unsupported feed type " << inputs[i].dtype;
      return false;
    }

L
liuwei1031 已提交
227 228
    PADDLE_ENFORCE_NOT_NULL(input_ptr);
    PADDLE_ENFORCE_NOT_NULL(inputs[i].data.data());
229 230 231 232 233 234
    if (platform::is_cpu_place(place_)) {
      // TODO(panyx0718): Init LoDTensor from existing memcpy to save a copy.
      std::memcpy(static_cast<void *>(input_ptr), inputs[i].data.data(),
                  inputs[i].data.length());
    } else {
#ifdef PADDLE_WITH_CUDA
Q
qingqing01 已提交
235 236 237 238
      platform::DeviceContextPool &pool =
          platform::DeviceContextPool::Instance();
      auto *dev_ctx =
          static_cast<const platform::CUDADeviceContext *>(pool.Get(place_));
239
      auto dst_gpu_place = BOOST_GET_CONST(platform::CUDAPlace, place_);
240 241
      memory::Copy(dst_gpu_place, static_cast<void *>(input_ptr),
                   platform::CPUPlace(), inputs[i].data.data(),
Q
qingqing01 已提交
242
                   inputs[i].data.length(), dev_ctx->stream());
243 244 245 246 247
#else
      PADDLE_THROW("Not compile with CUDA, should not reach here.");
#endif
    }

Y
Yan Chunwei 已提交
248 249 250 251 252 253
    // TODO(Superjomn) Low performance, need optimization for heavy LoD copy.
    framework::LoD lod;
    for (auto &level : inputs[i].lod) {
      lod.emplace_back(level);
    }
    input.set_lod(lod);
254 255
    int idx = -1;
    if (config_.specify_input_name) {
X
polish  
Xin Pan 已提交
256
      idx = feed_names_[inputs[i].name];
257
    } else {
258
      idx = BOOST_GET_CONST(int, feeds_[i]->GetAttr("col"));
259 260
    }
    framework::SetFeedVariable(scope, input, "feed", idx);
X
Xin Pan 已提交
261 262 263
  }
  return true;
}
L
luotao1 已提交
264 265 266
template <typename T>
void NativePaddlePredictor::GetFetchOne(const framework::LoDTensor &fetch,
                                        PaddleTensor *output) {
267 268 269 270 271 272 273 274 275 276 277 278 279 280
  // set shape.
  auto shape = framework::vectorize(fetch.dims());
  output->shape.assign(shape.begin(), shape.end());
  // set data.
  const T *data = fetch.data<T>();
  int num_elems = inference::VecReduceToInt(shape);
  output->data.Resize(num_elems * sizeof(T));
  // The fetched tensor output by fetch op, should always in CPU memory, so just
  // copy.
  memcpy(output->data.data(), data, num_elems * sizeof(T));
  // set lod
  output->lod.clear();
  for (auto &level : fetch.lod()) {
    output->lod.emplace_back(level.begin(), level.end());
L
luotao1 已提交
281 282
  }
}
X
Xin Pan 已提交
283

284 285
bool NativePaddlePredictor::GetFetch(std::vector<PaddleTensor> *outputs,
                                     framework::Scope *scope) {
286
  VLOG(3) << "Predictor::get_fetch";
287 288
  outputs->resize(fetchs_.size());
  for (size_t i = 0; i < fetchs_.size(); ++i) {
289
    int idx = BOOST_GET_CONST(int, fetchs_[i]->GetAttr("col"));
L
luotao1 已提交
290
    PADDLE_ENFORCE((size_t)idx == i);
291
    framework::FetchType &fetch_var =
292
        framework::GetFetchVariable(*scope, "fetch", idx);
293
    auto fetch = BOOST_GET_CONST(framework::LoDTensor, fetch_var);
L
luotao1 已提交
294 295
    auto type = fetch.type();
    auto output = &(outputs->at(i));
296
    output->name = fetchs_[idx]->Input("X")[0];
297
    if (type == framework::DataTypeTrait<float>::DataType()) {
L
luotao1 已提交
298 299
      GetFetchOne<float>(fetch, output);
      output->dtype = PaddleDType::FLOAT32;
300
    } else if (type == framework::DataTypeTrait<int64_t>::DataType()) {
L
luotao1 已提交
301 302
      GetFetchOne<int64_t>(fetch, output);
      output->dtype = PaddleDType::INT64;
303
    } else if (type == framework::DataTypeTrait<int32_t>::DataType()) {
304 305
      GetFetchOne<int32_t>(fetch, output);
      output->dtype = PaddleDType::INT32;
X
Xin Pan 已提交
306
    } else {
307
      LOG(ERROR) << "unknown type, only support float32, int64 and int32 now.";
Y
Yan Chunwei 已提交
308
    }
X
Xin Pan 已提交
309 310 311 312
  }
  return true;
}

313
template <>
314 315
std::unique_ptr<PaddlePredictor> CreatePaddlePredictor<
    NativeConfig, PaddleEngineKind::kNative>(const NativeConfig &config) {
W
Wilber 已提交
316 317
  // TODO(NHZlX): Should add the link to the doc of
  // paddle_infer::CreatePredictor<paddle_infer::Config>
318
  VLOG(3) << "create NativePaddlePredictor";
Y
Yan Chunwei 已提交
319
  if (config.use_gpu) {
S
Sylwester Fraczek 已提交
320
    // 1. GPU memory
321
    PADDLE_ENFORCE_GE(
322
        config.fraction_of_gpu_memory, 0.f,
323
        "fraction_of_gpu_memory in the config should be set to range (0., 1.]");
324
    PADDLE_ENFORCE_GE(config.device, 0, "Invalid device id %d", config.device);
Y
Yan Chunwei 已提交
325 326 327 328 329
    std::vector<std::string> flags;
    if (config.fraction_of_gpu_memory >= 0.0f ||
        config.fraction_of_gpu_memory <= 0.95f) {
      flags.push_back("dummpy");
      std::string flag = "--fraction_of_gpu_memory_to_use=" +
330
                         num2str<float>(config.fraction_of_gpu_memory);
Y
Yan Chunwei 已提交
331
      flags.push_back(flag);
332
      VLOG(3) << "set flag: " << flag;
Y
Yan Chunwei 已提交
333 334
      framework::InitGflags(flags);
    }
X
Xin Pan 已提交
335
  }
336

Y
Yan Chunwei 已提交
337
  std::unique_ptr<PaddlePredictor> predictor(new NativePaddlePredictor(config));
L
liuwei1031 已提交
338 339
  PADDLE_ENFORCE_NOT_NULL(
      dynamic_cast<NativePaddlePredictor *>(predictor.get()));
T
tensor-tang 已提交
340
  if (!dynamic_cast<NativePaddlePredictor *>(predictor.get())->Init(nullptr)) {
X
Xin Pan 已提交
341 342
    return nullptr;
  }
J
Fix mac  
JiabinYang 已提交
343
#ifdef __clang__
J
Jiabin Yang 已提交
344
  // fix clang compile error
J
Fix mac  
JiabinYang 已提交
345 346
  return predictor;
#else
347
  return std::move(predictor);
J
Fix mac  
JiabinYang 已提交
348
#endif
X
Xin Pan 已提交
349 350
}

Y
Yan Chunwei 已提交
351 352 353 354 355 356
template <>
std::unique_ptr<PaddlePredictor> CreatePaddlePredictor<NativeConfig>(
    const NativeConfig &config) {
  return CreatePaddlePredictor<NativeConfig, PaddleEngineKind::kNative>(config);
}

X
Xin Pan 已提交
357
}  // namespace paddle